1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INAT_H #define _ASM_X86_INAT_H /* * x86 instruction attributes * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ #include <asm/inat_types.h> /* * Internal bits. Don't use bitmasks directly, because these bits are * unstable. You should use checking functions. */ #define INAT_OPCODE_TABLE_SIZE 256 #define INAT_GROUP_TABLE_SIZE 8 /* Legacy last prefixes */ #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ #define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ #define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ /* Other Legacy prefixes */ #define INAT_PFX_LOCK 4 /* 0xF0 */ #define INAT_PFX_CS 5 /* 0x2E */ #define INAT_PFX_DS 6 /* 0x3E */ #define INAT_PFX_ES 7 /* 0x26 */ #define INAT_PFX_FS 8 /* 0x64 */ #define INAT_PFX_GS 9 /* 0x65 */ #define INAT_PFX_SS 10 /* 0x36 */ #define INAT_PFX_ADDRSZ 11 /* 0x67 */ /* x86-64 REX prefix */ #define INAT_PFX_REX 12 /* 0x4X */ /* AVX VEX prefixes */ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ #define INAT_PFX_EVEX 15 /* EVEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 /* Immediate size */ #define INAT_IMM_BYTE 1 #define INAT_IMM_WORD 2 #define INAT_IMM_DWORD 3 #define INAT_IMM_QWORD 4 #define INAT_IMM_PTR 5 #define INAT_IMM_VWORD32 6 #define INAT_IMM_VWORD 7 /* Legacy prefix */ #define INAT_PFX_OFFS 0 #define INAT_PFX_BITS 4 #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) /* Escape opcodes */ #define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) #define INAT_ESC_BITS 2 #define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) #define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) /* Group opcodes (1-16) */ #define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) #define INAT_GRP_BITS 5 #define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) #define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) /* Immediates */ #define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) #define INAT_IMM_BITS 3 #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) /* Flags */ #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) #define INAT_MODRM (1 << (INAT_FLAG_OFFS)) #define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) #define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) /* Identifiers for segment registers */ #define INAT_SEG_REG_IGNORE 0 #define INAT_SEG_REG_DEFAULT 1 #define INAT_SEG_REG_CS 2 #define INAT_SEG_REG_SS 3 #define INAT_SEG_REG_DS 4 #define INAT_SEG_REG_ES 5 #define INAT_SEG_REG_FS 6 #define INAT_SEG_REG_GS 7 /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); extern int inat_get_last_prefix_id(insn_byte_t last_pfx); extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_pp); /* Attribute checking functions */ static inline int inat_is_legacy_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; return attr && attr <= INAT_LGCPFX_MAX; } static inline int inat_is_address_size_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; } static inline int inat_is_operand_size_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; } static inline int inat_is_rex_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_REX; } static inline int inat_last_prefix_id(insn_attr_t attr) { if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) return 0; else return attr & INAT_PFX_MASK; } static inline int inat_is_vex_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || attr == INAT_PFX_EVEX; } static inline int inat_is_evex_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; } static inline int inat_is_vex3_prefix(insn_attr_t attr) { return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; } static inline int inat_is_escape(insn_attr_t attr) { return attr & INAT_ESC_MASK; } static inline int inat_escape_id(insn_attr_t attr) { return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; } static inline int inat_is_group(insn_attr_t attr) { return attr & INAT_GRP_MASK; } static inline int inat_group_id(insn_attr_t attr) { return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; } static inline int inat_group_common_attribute(insn_attr_t attr) { return attr & ~INAT_GRP_MASK; } static inline int inat_has_immediate(insn_attr_t attr) { return attr & INAT_IMM_MASK; } static inline int inat_immediate_size(insn_attr_t attr) { return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; } static inline int inat_has_modrm(insn_attr_t attr) { return attr & INAT_MODRM; } static inline int inat_is_force64(insn_attr_t attr) { return attr & INAT_FORCE64; } static inline int inat_has_second_immediate(insn_attr_t attr) { return attr & INAT_SCNDIMM; } static inline int inat_has_moffset(insn_attr_t attr) { return attr & INAT_MOFFSET; } static inline int inat_has_variant(insn_attr_t attr) { return attr & INAT_VARIANT; } static inline int inat_accept_vex(insn_attr_t attr) { return attr & INAT_VEXOK; } static inline int inat_must_vex(insn_attr_t attr) { return attr & (INAT_VEXONLY | INAT_EVEXONLY); } static inline int inat_must_evex(insn_attr_t attr) { return attr & INAT_EVEXONLY; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 /* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * This file is provided under a dual BSD/GPLv2 license. * * SipHash: a fast short-input PRF * https://131002.net/siphash/ * * This implementation is specifically for SipHash2-4 for a secure PRF * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for * hashtables. */ #ifndef _LINUX_SIPHASH_H #define _LINUX_SIPHASH_H #include <linux/types.h> #include <linux/kernel.h> #define SIPHASH_ALIGNMENT __alignof__(u64) typedef struct { u64 key[2]; } siphash_key_t; static inline bool siphash_key_is_zero(const siphash_key_t *key) { return !(key->key[0] | key->key[1]); } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); #endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); u64 siphash_3u64(const u64 a, const u64 b, const u64 c, const siphash_key_t *key); u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, const siphash_key_t *key); u64 siphash_1u32(const u32 a, const siphash_key_t *key); u64 siphash_3u32(const u32 a, const u32 b, const u32 c, const siphash_key_t *key); static inline u64 siphash_2u32(const u32 a, const u32 b, const siphash_key_t *key) { return siphash_1u64((u64)b << 32 | a, key); } static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, const siphash_key_t *key) { return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); } static inline u64 ___siphash_aligned(const __le64 *data, size_t len, const siphash_key_t *key) { if (__builtin_constant_p(len) && len == 4) return siphash_1u32(le32_to_cpup((const __le32 *)data), key); if (__builtin_constant_p(len) && len == 8) return siphash_1u64(le64_to_cpu(data[0]), key); if (__builtin_constant_p(len) && len == 16) return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), key); if (__builtin_constant_p(len) && len == 24) return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), le64_to_cpu(data[2]), key); if (__builtin_constant_p(len) && len == 32) return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), le64_to_cpu(data[2]), le64_to_cpu(data[3]), key); return __siphash_aligned(data, len, key); } /** * siphash - compute 64-bit siphash PRF value * @data: buffer to hash * @size: size of @data * @key: the siphash key */ static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); #endif return ___siphash_aligned(data, len, key); } #define HSIPHASH_ALIGNMENT __alignof__(unsigned long) typedef struct { unsigned long key[2]; } hsiphash_key_t; u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); #endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, const hsiphash_key_t *key); u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, const hsiphash_key_t *key); static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, const hsiphash_key_t *key) { if (__builtin_constant_p(len) && len == 4) return hsiphash_1u32(le32_to_cpu(data[0]), key); if (__builtin_constant_p(len) && len == 8) return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), key); if (__builtin_constant_p(len) && len == 12) return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), le32_to_cpu(data[2]), key); if (__builtin_constant_p(len) && len == 16) return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), le32_to_cpu(data[2]), le32_to_cpu(data[3]), key); return __hsiphash_aligned(data, len, key); } /** * hsiphash - compute 32-bit hsiphash PRF value * @data: buffer to hash * @size: size of @data * @key: the hsiphash key */ static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); #endif return ___hsiphash_aligned(data, len, key); } #endif /* _LINUX_SIPHASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_GENERIC_PGALLOC_H #define __ASM_GENERIC_PGALLOC_H #ifdef CONFIG_MMU #define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO) #define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) /** * __pte_alloc_one_kernel - allocate a page for PTE-level kernel page table * @mm: the mm_struct of the current context * * This function is intended for architectures that need * anything beyond simple page allocation. * * Return: pointer to the allocated memory or %NULL on error */ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) { return (pte_t *)__get_free_page(GFP_PGTABLE_KERNEL); } #ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL /** * pte_alloc_one_kernel - allocate a page for PTE-level kernel page table * @mm: the mm_struct of the current context * * Return: pointer to the allocated memory or %NULL on error */ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { return __pte_alloc_one_kernel(mm); } #endif /** * pte_free_kernel - free PTE-level kernel page table page * @mm: the mm_struct of the current context * @pte: pointer to the memory containing the page table */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } /** * __pte_alloc_one - allocate a page for PTE-level user page table * @mm: the mm_struct of the current context * @gfp: GFP flags to use for the allocation * * Allocates a page and runs the pgtable_pte_page_ctor(). * * This function is intended for architectures that need * anything beyond simple page allocation or must have custom GFP flags. * * Return: `struct page` initialized as page table or %NULL on error */ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) { struct page *pte; pte = alloc_page(gfp); if (!pte) return NULL; if (!pgtable_pte_page_ctor(pte)) { __free_page(pte); return NULL; } return pte; } #ifndef __HAVE_ARCH_PTE_ALLOC_ONE /** * pte_alloc_one - allocate a page for PTE-level user page table * @mm: the mm_struct of the current context * * Allocates a page and runs the pgtable_pte_page_ctor(). * * Return: `struct page` initialized as page table or %NULL on error */ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { return __pte_alloc_one(mm, GFP_PGTABLE_USER); } #endif /* * Should really implement gc for free page table pages. This could be * done with a reference count in struct page. */ /** * pte_free - free PTE-level user page table page * @mm: the mm_struct of the current context * @pte_page: the `struct page` representing the page table */ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) { pgtable_pte_page_dtor(pte_page); __free_page(pte_page); } #if CONFIG_PGTABLE_LEVELS > 2 #ifndef __HAVE_ARCH_PMD_ALLOC_ONE /** * pmd_alloc_one - allocate a page for PMD-level page table * @mm: the mm_struct of the current context * * Allocates a page and runs the pgtable_pmd_page_ctor(). * Allocations use %GFP_PGTABLE_USER in user context and * %GFP_PGTABLE_KERNEL in kernel context. * * Return: pointer to the allocated memory or %NULL on error */ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; page = alloc_pages(gfp, 0); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { __free_pages(page, 0); return NULL; } return (pmd_t *)page_address(page); } #endif #ifndef __HAVE_ARCH_PMD_FREE static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); } #endif #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 #ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate a page for PUD-level page table * @mm: the mm_struct of the current context * * Allocates a page using %GFP_PGTABLE_USER for user context and * %GFP_PGTABLE_KERNEL for kernel context. * * Return: pointer to the allocated memory or %NULL on error */ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; return (pud_t *)get_zeroed_page(gfp); } #endif static inline void pud_free(struct mm_struct *mm, pud_t *pud) { BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); free_page((unsigned long)pud); } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #ifndef __HAVE_ARCH_PGD_FREE static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long)pgd); } #endif #endif /* CONFIG_MMU */ #endif /* __ASM_GENERIC_PGALLOC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0 */ /* * bvec iterator * * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com> */ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H #include <linux/bug.h> #include <linux/errno.h> #include <linux/limits.h> #include <linux/minmax.h> #include <linux/mm.h> #include <linux/types.h> struct page; /** * struct bio_vec - a contiguous range of physical memory addresses * @bv_page: First page associated with the address range. * @bv_len: Number of bytes in the address range. * @bv_offset: Start of the address range relative to the start of @bv_page. * * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len: * * nth_page(@bv_page, n) == @bv_page + n * * This holds because page_is_mergeable() checks the above property. */ struct bio_vec { struct page *bv_page; unsigned int bv_len; unsigned int bv_offset; }; struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ unsigned int bi_size; /* residual I/O count */ unsigned int bi_idx; /* current index into bvl_vec */ unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ }; struct bvec_iter_all { struct bio_vec bv; int idx; unsigned done; }; /* * various member access, note that bio_data should of course not be used * on highmem page vectors */ #define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) /* multi-page (mp_bvec) helpers */ #define mp_bvec_iter_page(bvec, iter) \ (__bvec_iter_bvec((bvec), (iter))->bv_page) #define mp_bvec_iter_len(bvec, iter) \ min((iter).bi_size, \ __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) #define mp_bvec_iter_offset(bvec, iter) \ (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) #define mp_bvec_iter_page_idx(bvec, iter) \ (mp_bvec_iter_offset((bvec), (iter)) / PAGE_SIZE) #define mp_bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ .bv_page = mp_bvec_iter_page((bvec), (iter)), \ .bv_len = mp_bvec_iter_len((bvec), (iter)), \ .bv_offset = mp_bvec_iter_offset((bvec), (iter)), \ }) /* For building single-page bvec in flight */ #define bvec_iter_offset(bvec, iter) \ (mp_bvec_iter_offset((bvec), (iter)) % PAGE_SIZE) #define bvec_iter_len(bvec, iter) \ min_t(unsigned, mp_bvec_iter_len((bvec), (iter)), \ PAGE_SIZE - bvec_iter_offset((bvec), (iter))) #define bvec_iter_page(bvec, iter) \ (mp_bvec_iter_page((bvec), (iter)) + \ mp_bvec_iter_page_idx((bvec), (iter))) #define bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ .bv_page = bvec_iter_page((bvec), (iter)), \ .bv_len = bvec_iter_len((bvec), (iter)), \ .bv_offset = bvec_iter_offset((bvec), (iter)), \ }) static inline bool bvec_iter_advance(const struct bio_vec *bv, struct bvec_iter *iter, unsigned bytes) { unsigned int idx = iter->bi_idx; if (WARN_ONCE(bytes > iter->bi_size, "Attempted to advance past end of bvec iter\n")) { iter->bi_size = 0; return false; } iter->bi_size -= bytes; bytes += iter->bi_bvec_done; while (bytes && bytes >= bv[idx].bv_len) { bytes -= bv[idx].bv_len; idx++; } iter->bi_idx = idx; iter->bi_bvec_done = bytes; return true; } static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) { iter->bi_bvec_done = 0; iter->bi_idx++; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ { \ .bi_sector = 0, \ .bi_size = UINT_MAX, \ .bi_idx = 0, \ .bi_bvec_done = 0, \ } static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) { iter_all->done = 0; iter_all->idx = 0; return &iter_all->bv; } static inline void bvec_advance(const struct bio_vec *bvec, struct bvec_iter_all *iter_all) { struct bio_vec *bv = &iter_all->bv; if (iter_all->done) { bv->bv_page++; bv->bv_offset = 0; } else { bv->bv_page = bvec->bv_page + (bvec->bv_offset >> PAGE_SHIFT); bv->bv_offset = bvec->bv_offset & ~PAGE_MASK; } bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, bvec->bv_len - iter_all->done); iter_all->done += bv->bv_len; if (iter_all->done == bvec->bv_len) { iter_all->idx++; iter_all->done = 0; } } #endif /* __LINUX_BVEC_ITER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _MM_PERCPU_INTERNAL_H #define _MM_PERCPU_INTERNAL_H #include <linux/types.h> #include <linux/percpu.h> /* * There are two chunk types: root and memcg-aware. * Chunks of each type have separate slots list. * * Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is * used to store memcg membership data of a percpu object. Obj_cgroups are * ref-counted pointers to a memory cgroup with an ability to switch dynamically * to the parent memory cgroup. This allows to reclaim a deleted memory cgroup * without reclaiming of all outstanding objects, which hold a reference at it. */ enum pcpu_chunk_type { PCPU_CHUNK_ROOT, #ifdef CONFIG_MEMCG_KMEM PCPU_CHUNK_MEMCG, #endif PCPU_NR_CHUNK_TYPES, PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES }; /* * pcpu_block_md is the metadata block struct. * Each chunk's bitmap is split into a number of full blocks. * All units are in terms of bits. * * The scan hint is the largest known contiguous area before the contig hint. * It is not necessarily the actual largest contig hint though. There is an * invariant that the scan_hint_start > contig_hint_start iff * scan_hint == contig_hint. This is necessary because when scanning forward, * we don't know if a new contig hint would be better than the current one. */ struct pcpu_block_md { int scan_hint; /* scan hint for block */ int scan_hint_start; /* block relative starting position of the scan hint */ int contig_hint; /* contig hint for block */ int contig_hint_start; /* block relative starting position of the contig hint */ int left_free; /* size of free space along the left side of the block */ int right_free; /* size of free space along the right side of the block */ int first_free; /* block position of first free */ int nr_bits; /* total bits responsible for */ }; struct pcpu_chunk { #ifdef CONFIG_PERCPU_STATS int nr_alloc; /* # of allocations */ size_t max_alloc_size; /* largest allocation size */ #endif struct list_head list; /* linked to pcpu_slot lists */ int free_bytes; /* free bytes in the chunk */ struct pcpu_block_md chunk_md; void *base_addr; /* base address of this chunk */ unsigned long *alloc_map; /* allocation map */ unsigned long *bound_map; /* boundary map */ struct pcpu_block_md *md_blocks; /* metadata blocks */ void *data; /* chunk data */ bool immutable; /* no [de]population allowed */ int start_offset; /* the overlap with the previous region to have a page aligned base_addr */ int end_offset; /* additional area required to have the region end page aligned */ #ifdef CONFIG_MEMCG_KMEM struct obj_cgroup **obj_cgroups; /* vector of object cgroups */ #endif int nr_pages; /* # of pages served by this chunk */ int nr_populated; /* # of populated pages */ int nr_empty_pop_pages; /* # of empty populated pages */ unsigned long populated[]; /* populated bitmap */ }; extern spinlock_t pcpu_lock; extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; extern int pcpu_nr_empty_pop_pages[]; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; /** * pcpu_chunk_nr_blocks - converts nr_pages to # of md_blocks * @chunk: chunk of interest * * This conversion is from the number of physical pages that the chunk * serves to the number of bitmap blocks used. */ static inline int pcpu_chunk_nr_blocks(struct pcpu_chunk *chunk) { return chunk->nr_pages * PAGE_SIZE / PCPU_BITMAP_BLOCK_SIZE; } /** * pcpu_nr_pages_to_map_bits - converts the pages to size of bitmap * @pages: number of physical pages * * This conversion is from physical pages to the number of bits * required in the bitmap. */ static inline int pcpu_nr_pages_to_map_bits(int pages) { return pages * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; } /** * pcpu_chunk_map_bits - helper to convert nr_pages to size of bitmap * @chunk: chunk of interest * * This conversion is from the number of physical pages that the chunk * serves to the number of bits in the bitmap. */ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) { return pcpu_nr_pages_to_map_bits(chunk->nr_pages); } #ifdef CONFIG_MEMCG_KMEM static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) { if (chunk->obj_cgroups) return PCPU_CHUNK_MEMCG; return PCPU_CHUNK_ROOT; } static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) { return chunk_type == PCPU_CHUNK_MEMCG; } #else static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) { return PCPU_CHUNK_ROOT; } static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) { return false; } #endif static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type) { return &pcpu_chunk_lists[pcpu_nr_slots * pcpu_is_memcg_chunk(chunk_type)]; } #ifdef CONFIG_PERCPU_STATS #include <linux/spinlock.h> struct percpu_stats { u64 nr_alloc; /* lifetime # of allocations */ u64 nr_dealloc; /* lifetime # of deallocations */ u64 nr_cur_alloc; /* current # of allocations */ u64 nr_max_alloc; /* max # of live allocations */ u32 nr_chunks; /* current # of live chunks */ u32 nr_max_chunks; /* max # of live chunks */ size_t min_alloc_size; /* min allocaiton size */ size_t max_alloc_size; /* max allocation size */ }; extern struct percpu_stats pcpu_stats; extern struct pcpu_alloc_info pcpu_stats_ai; /* * For debug purposes. We don't care about the flexible array. */ static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) { memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info)); /* initialize min_alloc_size to unit_size */ pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size; } /* * pcpu_stats_area_alloc - increment area allocation stats * @chunk: the location of the area being allocated * @size: size of area to allocate in bytes * * CONTEXT: * pcpu_lock. */ static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) { lockdep_assert_held(&pcpu_lock); pcpu_stats.nr_alloc++; pcpu_stats.nr_cur_alloc++; pcpu_stats.nr_max_alloc = max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc); pcpu_stats.min_alloc_size = min(pcpu_stats.min_alloc_size, size); pcpu_stats.max_alloc_size = max(pcpu_stats.max_alloc_size, size); chunk->nr_alloc++; chunk->max_alloc_size = max(chunk->max_alloc_size, size); } /* * pcpu_stats_area_dealloc - decrement allocation stats * @chunk: the location of the area being deallocated * * CONTEXT: * pcpu_lock. */ static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) { lockdep_assert_held(&pcpu_lock); pcpu_stats.nr_dealloc++; pcpu_stats.nr_cur_alloc--; chunk->nr_alloc--; } /* * pcpu_stats_chunk_alloc - increment chunk stats */ static inline void pcpu_stats_chunk_alloc(void) { unsigned long flags; spin_lock_irqsave(&pcpu_lock, flags); pcpu_stats.nr_chunks++; pcpu_stats.nr_max_chunks = max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks); spin_unlock_irqrestore(&pcpu_lock, flags); } /* * pcpu_stats_chunk_dealloc - decrement chunk stats */ static inline void pcpu_stats_chunk_dealloc(void) { unsigned long flags; spin_lock_irqsave(&pcpu_lock, flags); pcpu_stats.nr_chunks--; spin_unlock_irqrestore(&pcpu_lock, flags); } #else static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) { } static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) { } static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) { } static inline void pcpu_stats_chunk_alloc(void) { } static inline void pcpu_stats_chunk_dealloc(void) { } #endif /* !CONFIG_PERCPU_STATS */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SOCKET_H #define _LINUX_SOCKET_H #include <asm/socket.h> /* arch-dependent defines */ #include <linux/sockios.h> /* the SIOCxxx I/O controls */ #include <linux/uio.h> /* iovec support */ #include <linux/types.h> /* pid_t */ #include <linux/compiler.h> /* __user */ #include <uapi/linux/socket.h> struct file; struct pid; struct cred; struct socket; #define __sockaddr_check_size(size) \ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); #endif typedef __kernel_sa_family_t sa_family_t; /* * 1003.1g requires sa_family_t and that sa_data is char. */ struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ }; struct linger { int l_onoff; /* Linger active */ int l_linger; /* How long to linger for */ }; #define sockaddr_storage __kernel_sockaddr_storage /* * As we do 4.4BSD message passing we use a 4.4BSD message passing * system, not 4.3. Thus msg_accrights(len) are now missing. They * belong in an obscure libc emulation or the bin. */ struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ struct iov_iter msg_iter; /* data */ /* * Ancillary data. msg_control_user is the user buffer used for the * recv* side when msg_control_is_user is set, msg_control is the kernel * buffer used for all other cases. */ union { void *msg_control; void __user *msg_control_user; }; bool msg_control_is_user : 1; __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; struct user_msghdr { void __user *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ struct iovec __user *msg_iov; /* scatter/gather array */ __kernel_size_t msg_iovlen; /* # elements in msg_iov */ void __user *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ }; /* For recvmmsg/sendmmsg */ struct mmsghdr { struct user_msghdr msg_hdr; unsigned int msg_len; }; /* * POSIX 1003.1g - ancillary data object information * Ancillary data consits of a sequence of pairs of * (cmsghdr, cmsg_data[]) */ struct cmsghdr { __kernel_size_t cmsg_len; /* data byte count, including hdr */ int cmsg_level; /* originating protocol */ int cmsg_type; /* protocol-specific type */ }; /* * Ancillary data object information MACROS * Table 5-14 of POSIX 1003.1g */ #define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg)) #define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg)) #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) #define CMSG_DATA(cmsg) \ ((void *)(cmsg) + sizeof(struct cmsghdr)) #define CMSG_USER_DATA(cmsg) \ ((void __user *)(cmsg) + sizeof(struct cmsghdr)) #define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len)) #define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len)) #define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \ (struct cmsghdr *)(ctl) : \ (struct cmsghdr *)NULL) #define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) #define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \ (cmsg)->cmsg_len <= (unsigned long) \ ((mhdr)->msg_controllen - \ ((char *)(cmsg) - (char *)(mhdr)->msg_control))) #define for_each_cmsghdr(cmsg, msg) \ for (cmsg = CMSG_FIRSTHDR(msg); \ cmsg; \ cmsg = CMSG_NXTHDR(msg, cmsg)) /* * Get the next cmsg header * * PLEASE, do not touch this function. If you think, that it is * incorrect, grep kernel sources and think about consequences * before trying to improve it. * * Now it always returns valid, not truncated ancillary object * HEADER. But caller still MUST check, that cmsg->cmsg_len is * inside range, given by msg->msg_controllen before using * ancillary object DATA. --ANK (980731) */ static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { struct cmsghdr * __ptr; __ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len)); if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size) return (struct cmsghdr *)0; return __ptr; } static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) { return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } static inline size_t msg_data_left(struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ #define SCM_SECURITY 0x03 /* rw: security label */ struct ucred { __u32 pid; __u32 uid; __u32 gid; }; /* Supported address families. */ #define AF_UNSPEC 0 #define AF_UNIX 1 /* Unix domain sockets */ #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ #define AF_INET 2 /* Internet IP Protocol */ #define AF_AX25 3 /* Amateur Radio AX.25 */ #define AF_IPX 4 /* Novell IPX */ #define AF_APPLETALK 5 /* AppleTalk DDP */ #define AF_NETROM 6 /* Amateur Radio NET/ROM */ #define AF_BRIDGE 7 /* Multiprotocol bridge */ #define AF_ATMPVC 8 /* ATM PVCs */ #define AF_X25 9 /* Reserved for X.25 project */ #define AF_INET6 10 /* IP version 6 */ #define AF_ROSE 11 /* Amateur Radio X.25 PLP */ #define AF_DECnet 12 /* Reserved for DECnet project */ #define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ #define AF_SECURITY 14 /* Security callback pseudo AF */ #define AF_KEY 15 /* PF_KEY key management API */ #define AF_NETLINK 16 #define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ #define AF_PACKET 17 /* Packet family */ #define AF_ASH 18 /* Ash */ #define AF_ECONET 19 /* Acorn Econet */ #define AF_ATMSVC 20 /* ATM SVCs */ #define AF_RDS 21 /* RDS sockets */ #define AF_SNA 22 /* Linux SNA Project (nutters!) */ #define AF_IRDA 23 /* IRDA sockets */ #define AF_PPPOX 24 /* PPPoX sockets */ #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ #define AF_IB 27 /* Native InfiniBand address */ #define AF_MPLS 28 /* MPLS */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ #define AF_IUCV 32 /* IUCV sockets */ #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ #define AF_PHONET 35 /* Phonet sockets */ #define AF_IEEE802154 36 /* IEEE802154 sockets */ #define AF_CAIF 37 /* CAIF sockets */ #define AF_ALG 38 /* Algorithm sockets */ #define AF_NFC 39 /* NFC sockets */ #define AF_VSOCK 40 /* vSockets */ #define AF_KCM 41 /* Kernel Connection Multiplexor*/ #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ #define AF_SMC 43 /* smc sockets: reserve number for * PF_SMC protocol family that * reuses AF_INET address family */ #define AF_XDP 44 /* XDP sockets */ #define AF_MAX 45 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC #define PF_UNIX AF_UNIX #define PF_LOCAL AF_LOCAL #define PF_INET AF_INET #define PF_AX25 AF_AX25 #define PF_IPX AF_IPX #define PF_APPLETALK AF_APPLETALK #define PF_NETROM AF_NETROM #define PF_BRIDGE AF_BRIDGE #define PF_ATMPVC AF_ATMPVC #define PF_X25 AF_X25 #define PF_INET6 AF_INET6 #define PF_ROSE AF_ROSE #define PF_DECnet AF_DECnet #define PF_NETBEUI AF_NETBEUI #define PF_SECURITY AF_SECURITY #define PF_KEY AF_KEY #define PF_NETLINK AF_NETLINK #define PF_ROUTE AF_ROUTE #define PF_PACKET AF_PACKET #define PF_ASH AF_ASH #define PF_ECONET AF_ECONET #define PF_ATMSVC AF_ATMSVC #define PF_RDS AF_RDS #define PF_SNA AF_SNA #define PF_IRDA AF_IRDA #define PF_PPPOX AF_PPPOX #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC #define PF_IB AF_IB #define PF_MPLS AF_MPLS #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH #define PF_IUCV AF_IUCV #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN #define PF_PHONET AF_PHONET #define PF_IEEE802154 AF_IEEE802154 #define PF_CAIF AF_CAIF #define PF_ALG AF_ALG #define PF_NFC AF_NFC #define PF_VSOCK AF_VSOCK #define PF_KCM AF_KCM #define PF_QIPCRTR AF_QIPCRTR #define PF_SMC AF_SMC #define PF_XDP AF_XDP #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ #define SOMAXCONN 4096 /* Flags we can use with send/ and recv. Added those for 1003.1g not all are supported yet */ #define MSG_OOB 1 #define MSG_PEEK 2 #define MSG_DONTROUTE 4 #define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ #define MSG_CTRUNC 8 #define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ #define MSG_TRUNC 0x20 #define MSG_DONTWAIT 0x40 /* Nonblocking io */ #define MSG_EOR 0x80 /* End of record */ #define MSG_WAITALL 0x100 /* Wait for a full request */ #define MSG_FIN 0x200 #define MSG_SYN 0x400 #define MSG_CONFIRM 0x800 /* Confirm path validity */ #define MSG_RST 0x1000 #define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ #define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ #define MSG_MORE 0x8000 /* Sender will send more */ #define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ #define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */ #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ #define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry * plain text and require encryption */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through SCM_RIGHTS */ #if defined(CONFIG_COMPAT) #define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ #else #define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ #endif /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ #define SOL_IP 0 /* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */ #define SOL_TCP 6 #define SOL_UDP 17 #define SOL_IPV6 41 #define SOL_ICMPV6 58 #define SOL_SCTP 132 #define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */ #define SOL_RAW 255 #define SOL_IPX 256 #define SOL_AX25 257 #define SOL_ATALK 258 #define SOL_NETROM 259 #define SOL_ROSE 260 #define SOL_DECNET 261 #define SOL_X25 262 #define SOL_PACKET 263 #define SOL_ATM 264 /* ATM layer (cell level) */ #define SOL_AAL 265 /* ATM Adaption Layer (packet level) */ #define SOL_IRDA 266 #define SOL_NETBEUI 267 #define SOL_LLC 268 #define SOL_DCCP 269 #define SOL_NETLINK 270 #define SOL_TIPC 271 #define SOL_RXRPC 272 #define SOL_PPPOL2TP 273 #define SOL_BLUETOOTH 274 #define SOL_PNPIPE 275 #define SOL_RDS 276 #define SOL_IUCV 277 #define SOL_CAIF 278 #define SOL_ALG 279 #define SOL_NFC 280 #define SOL_KCM 281 #define SOL_TLS 282 #define SOL_XDP 283 /* IPX options */ #define IPX_TYPE 1 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec64; struct __kernel_timespec; struct old_timespec32; struct scm_timestamping_internal { struct timespec64 ts[3]; }; extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss); /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff * forbid_cmsg_compat==false */ extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat); extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat); extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, struct __kernel_timespec __user *timeout, struct old_timespec32 __user *timeout32); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat); extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, unsigned int flags); extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags); extern int sendmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct iovec **iov); extern int recvmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct sockaddr __user **uaddr, struct iovec **iov); extern int __copy_msghdr_from_user(struct msghdr *kmsg, struct user_msghdr __user *umsg, struct sockaddr __user **save_addr, struct iovec __user **uiov, size_t *nsegs); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len); extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); extern int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags, unsigned long nofile); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); extern int __sys_shutdown(int fd, int how); #endif /* _LINUX_SOCKET_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 #ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H #define _LINUX_UNALIGNED_PACKED_STRUCT_H #include <linux/kernel.h> struct __una_u16 { u16 x; } __packed; struct __una_u32 { u32 x; } __packed; struct __una_u64 { u64 x; } __packed; static inline u16 __get_unaligned_cpu16(const void *p) { const struct __una_u16 *ptr = (const struct __una_u16 *)p; return ptr->x; } static inline u32 __get_unaligned_cpu32(const void *p) { const struct __una_u32 *ptr = (const struct __una_u32 *)p; return ptr->x; } static inline u64 __get_unaligned_cpu64(const void *p) { const struct __una_u64 *ptr = (const struct __una_u64 *)p; return ptr->x; } static inline void __put_unaligned_cpu16(u16 val, void *p) { struct __una_u16 *ptr = (struct __una_u16 *)p; ptr->x = val; } static inline void __put_unaligned_cpu32(u32 val, void *p) { struct __una_u32 *ptr = (struct __una_u32 *)p; ptr->x = val; } static inline void __put_unaligned_cpu64(u64 val, void *p) { struct __una_u64 *ptr = (struct __una_u64 *)p; ptr->x = val; } #endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 /* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of the Linux kernel. * * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu <fenghua.yu@intel.com>, * H. Peter Anvin <hpa@linux.intel.com> */ #ifndef ASM_X86_ARCHRANDOM_H #define ASM_X86_ARCHRANDOM_H #include <asm/processor.h> #include <asm/cpufeature.h> #define RDRAND_RETRY_LOOPS 10 /* Unconditional execution of RDRAND and RDSEED */ static inline bool __must_check rdrand_long(unsigned long *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { asm volatile("rdrand %[out]" CC_SET(c) : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); return false; } static inline bool __must_check rdrand_int(unsigned int *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { asm volatile("rdrand %[out]" CC_SET(c) : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); return false; } static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile("rdseed %[out]" CC_SET(c) : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } static inline bool __must_check rdseed_int(unsigned int *v) { bool ok; asm volatile("rdseed %[out]" CC_SET(c) : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } /* * These are the generic interfaces; they must not be declared if the * stubs in <linux/random.h> are to be invoked, * i.e. CONFIG_ARCH_RANDOM is not defined. */ #ifdef CONFIG_ARCH_RANDOM static inline bool __must_check arch_get_random_long(unsigned long *v) { return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false; } static inline bool __must_check arch_get_random_int(unsigned int *v) { return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false; } static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false; } static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false; } extern void x86_init_rdrand(struct cpuinfo_x86 *c); #else /* !CONFIG_ARCH_RANDOM */ static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { } #endif /* !CONFIG_ARCH_RANDOM */ #endif /* ASM_X86_ARCHRANDOM_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM msr #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE msr-trace #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH asm/ #if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MSR_H #include <linux/tracepoint.h> /* * Tracing for x86 model specific registers. Directly maps to the * RDMSR/WRMSR instructions. */ DECLARE_EVENT_CLASS(msr_trace_class, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed), TP_STRUCT__entry( __field( unsigned, msr ) __field( u64, val ) __field( int, failed ) ), TP_fast_assign( __entry->msr = msr; __entry->val = val; __entry->failed = failed; ), TP_printk("%x, value %llx%s", __entry->msr, __entry->val, __entry->failed ? " #GP" : "") ); DEFINE_EVENT(msr_trace_class, read_msr, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed) ); DEFINE_EVENT(msr_trace_class, write_msr, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed) ); DEFINE_EVENT(msr_trace_class, rdpmc, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed) ); #endif /* _TRACE_MSR_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMAN_H #define _LINUX_MMAN_H #include <linux/mm.h> #include <linux/percpu_counter.h> #include <linux/atomic.h> #include <uapi/linux/mman.h> /* * Arrange for legacy / undefined architecture specific flags to be * ignored by mmap handling code. */ #ifndef MAP_32BIT #define MAP_32BIT 0 #endif #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB 0 #endif #ifndef MAP_HUGE_1GB #define MAP_HUGE_1GB 0 #endif #ifndef MAP_UNINITIALIZED #define MAP_UNINITIALIZED 0 #endif #ifndef MAP_SYNC #define MAP_SYNC 0 #endif /* * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ | MAP_FIXED \ | MAP_ANONYMOUS \ | MAP_DENYWRITE \ | MAP_EXECUTABLE \ | MAP_UNINITIALIZED \ | MAP_GROWSDOWN \ | MAP_LOCKED \ | MAP_NORESERVE \ | MAP_POPULATE \ | MAP_NONBLOCK \ | MAP_STACK \ | MAP_HUGETLB \ | MAP_32BIT \ | MAP_HUGE_2MB \ | MAP_HUGE_1GB) extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; extern struct percpu_counter vm_committed_as; #ifdef CONFIG_SMP extern s32 vm_committed_as_batch; extern void mm_compute_batch(int overcommit_policy); #else #define vm_committed_as_batch 0 static inline void mm_compute_batch(int overcommit_policy) { } #endif unsigned long vm_memory_committed(void); static inline void vm_acct_memory(long pages) { percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch); } static inline void vm_unacct_memory(long pages) { vm_acct_memory(-pages); } /* * Allow architectures to handle additional protection and flag bits. The * overriding macros must be defined in the arch-specific asm/mman.h file. */ #ifndef arch_calc_vm_prot_bits #define arch_calc_vm_prot_bits(prot, pkey) 0 #endif #ifndef arch_calc_vm_flag_bits #define arch_calc_vm_flag_bits(flags) 0 #endif #ifndef arch_vm_get_page_prot #define arch_vm_get_page_prot(vm_flags) __pgprot(0) #endif #ifndef arch_validate_prot /* * This is called from mprotect(). PROT_GROWSDOWN and PROT_GROWSUP have * already been masked out. * * Returns true if the prot flags are valid */ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0; } #define arch_validate_prot arch_validate_prot #endif #ifndef arch_validate_flags /* * This is called from mmap() and mprotect() with the updated vma->vm_flags. * * Returns true if the VM_* flags are valid. */ static inline bool arch_validate_flags(unsigned long flags) { return true; } #define arch_validate_flags arch_validate_flags #endif /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 * but this version is faster. * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. */ static inline unsigned long calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { return _calc_vm_trans(prot, PROT_READ, VM_READ ) | _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) | _calc_vm_trans(prot, PROT_EXEC, VM_EXEC) | arch_calc_vm_prot_bits(prot, pkey); } /* * Combine the mmap "flags" argument into "vm_flags" used internally. */ static inline unsigned long calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); #endif /* _LINUX_MMAN_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _LINUX_KPROBES_H #define _LINUX_KPROBES_H /* * Kernel Probes (KProbes) * include/linux/kprobes.h * * Copyright (C) IBM Corporation, 2002, 2004 * * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel * Probes initial implementation ( includes suggestions from * Rusty Russell). * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes * interface to access function arguments. * 2005-May Hien Nguyen <hien@us.ibm.com> and Jim Keniston * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/list.h> #include <linux/notifier.h> #include <linux/smp.h> #include <linux/bug.h> #include <linux/percpu.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/ftrace.h> #include <asm/kprobes.h> #ifdef CONFIG_KPROBES /* kprobe_status settings */ #define KPROBE_HIT_ACTIVE 0x00000001 #define KPROBE_HIT_SS 0x00000002 #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 #else /* CONFIG_KPROBES */ #include <asm-generic/kprobes.h> typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; }; #endif /* CONFIG_KPROBES */ struct kprobe; struct pt_regs; struct kretprobe; struct kretprobe_instance; typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *); typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *, unsigned long flags); typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *, int trapnr); typedef int (*kretprobe_handler_t) (struct kretprobe_instance *, struct pt_regs *); struct kprobe { struct hlist_node hlist; /* list of kprobes for multi-handler support */ struct list_head list; /*count the number of times this probe was temporarily disarmed */ unsigned long nmissed; /* location of the probe point */ kprobe_opcode_t *addr; /* Allow user to indicate symbol name of the probe point */ const char *symbol_name; /* Offset into the symbol */ unsigned int offset; /* Called before addr is executed. */ kprobe_pre_handler_t pre_handler; /* Called after addr is executed, unless... */ kprobe_post_handler_t post_handler; /* * ... called if executing addr causes a fault (eg. page fault). * Return 1 if it handled fault, otherwise kernel will see it. */ kprobe_fault_handler_t fault_handler; /* Saved opcode (which has been replaced with breakpoint) */ kprobe_opcode_t opcode; /* copy of the original instruction */ struct arch_specific_insn ainsn; /* * Indicates various status flags. * Protected by kprobe_mutex after this kprobe is registered. */ u32 flags; }; /* Kprobe status flags */ #define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */ #define KPROBE_FLAG_DISABLED 2 /* probe is temporarily disabled */ #define KPROBE_FLAG_OPTIMIZED 4 /* * probe is really optimized. * NOTE: * this flag is only for optimized_kprobe. */ #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ static inline int kprobe_gone(struct kprobe *p) { return p->flags & KPROBE_FLAG_GONE; } /* Is this kprobe disabled ? */ static inline int kprobe_disabled(struct kprobe *p) { return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE); } /* Is this kprobe really running optimized path ? */ static inline int kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } /* Is this kprobe uses ftrace ? */ static inline int kprobe_ftrace(struct kprobe *p) { return p->flags & KPROBE_FLAG_FTRACE; } /* * Function-return probe - * Note: * User needs to provide a handler function, and initialize maxactive. * maxactive - The maximum number of instances of the probed function that * can be active concurrently. * nmissed - tracks the number of times the probed function's return was * ignored, due to maxactive being too low. * */ struct kretprobe { struct kprobe kp; kretprobe_handler_t handler; kretprobe_handler_t entry_handler; int maxactive; int nmissed; size_t data_size; struct hlist_head free_instances; raw_spinlock_t lock; }; struct kretprobe_instance { union { struct hlist_node hlist; struct rcu_head rcu; }; struct kretprobe *rp; kprobe_opcode_t *ret_addr; struct task_struct *task; void *fp; char data[]; }; struct kretprobe_blackpoint { const char *name; void *addr; }; struct kprobe_blacklist_entry { struct list_head list; unsigned long start_addr; unsigned long end_addr; }; #ifdef CONFIG_KPROBES DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * For #ifdef avoidance: */ static inline int kprobes_built_in(void) { return 1; } extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); #ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); /* If the trampoline handler called from a kprobe, use this version */ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, void *trampoline_address, void *frame_pointer); static nokprobe_inline unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, void *trampoline_address, void *frame_pointer) { unsigned long ret; /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never runs in kprobe handler, no kprobe must * be running at this point. */ kprobe_busy_begin(); ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer); kprobe_busy_end(); return ret; } #else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { } static inline int arch_trampoline_kprobe(struct kprobe *p) { return 0; } #endif /* CONFIG_KRETPROBES */ extern struct kretprobe_blackpoint kretprobe_blacklist[]; #ifdef CONFIG_KPROBES_SANITY_TEST extern int init_test_probes(void); #else static inline int init_test_probes(void) { return 0; } #endif /* CONFIG_KPROBES_SANITY_TEST */ extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); extern bool arch_kprobe_on_func_entry(unsigned long offset); extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); extern int kprobe_add_ksym_blacklist(unsigned long entry); extern int kprobe_add_area_blacklist(unsigned long start, unsigned long end); struct kprobe_insn_cache { struct mutex mutex; void *(*alloc)(void); /* allocate insn page */ void (*free)(void *); /* free insn page */ const char *sym; /* symbol for insn pages */ struct list_head pages; /* list of kprobe_insn_page */ size_t insn_size; /* size of instruction slot */ int nr_garbage; }; #ifdef __ARCH_WANT_KPROBES_INSN_SLOT extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c); extern void __free_insn_slot(struct kprobe_insn_cache *c, kprobe_opcode_t *slot, int dirty); /* sleep-less address checking routine */ extern bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr); #define DEFINE_INSN_CACHE_OPS(__name) \ extern struct kprobe_insn_cache kprobe_##__name##_slots; \ \ static inline kprobe_opcode_t *get_##__name##_slot(void) \ { \ return __get_insn_slot(&kprobe_##__name##_slots); \ } \ \ static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\ { \ __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \ } \ \ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ { \ return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \ } #define KPROBE_INSN_PAGE_SYM "kprobe_insn_page" #define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page" int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, unsigned long *value, char *type, char *sym); #else /* __ARCH_WANT_KPROBES_INSN_SLOT */ #define DEFINE_INSN_CACHE_OPS(__name) \ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ { \ return 0; \ } #endif DEFINE_INSN_CACHE_OPS(insn); #ifdef CONFIG_OPTPROBES /* * Internal structure for direct jump optimized probe */ struct optimized_kprobe { struct kprobe kp; struct list_head list; /* list for optimizing queue */ struct arch_optimized_insn optinsn; }; /* Architecture dependent functions for direct jump optimization */ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn); extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig); extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); extern void arch_optimize_kprobes(struct list_head *oplist); extern void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr); extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); DEFINE_INSN_CACHE_OPS(optinsn); #ifdef CONFIG_SYSCTL extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); #endif extern void wait_for_kprobe_optimizer(void); #else static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif int arch_check_ftrace_location(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); /* kprobe_running() will just return the current_kprobe on this CPU */ static inline struct kprobe *kprobe_running(void) { return (__this_cpu_read(current_kprobe)); } static inline void reset_current_kprobe(void) { __this_cpu_write(current_kprobe, NULL); } static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) { return this_cpu_ptr(&kprobe_ctlblk); } kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); int register_kretprobes(struct kretprobe **rps, int num); void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); void kprobe_free_init_mem(void); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); void dump_kprobe(struct kprobe *kp); void *alloc_insn_page(void); void free_insn_page(void *page); int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) { return 0; } static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { return 0; } static inline struct kprobe *get_kprobe(void *addr) { return NULL; } static inline struct kprobe *kprobe_running(void) { return NULL; } static inline int register_kprobe(struct kprobe *p) { return -ENOSYS; } static inline int register_kprobes(struct kprobe **kps, int num) { return -ENOSYS; } static inline void unregister_kprobe(struct kprobe *p) { } static inline void unregister_kprobes(struct kprobe **kps, int num) { } static inline int register_kretprobe(struct kretprobe *rp) { return -ENOSYS; } static inline int register_kretprobes(struct kretprobe **rps, int num) { return -ENOSYS; } static inline void unregister_kretprobe(struct kretprobe *rp) { } static inline void unregister_kretprobes(struct kretprobe **rps, int num) { } static inline void kprobe_flush_task(struct task_struct *tk) { } static inline void kprobe_free_init_mem(void) { } static inline int disable_kprobe(struct kprobe *kp) { return -ENOSYS; } static inline int enable_kprobe(struct kprobe *kp) { return -ENOSYS; } static inline bool within_kprobe_blacklist(unsigned long addr) { return true; } static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { return -ERANGE; } #endif /* CONFIG_KPROBES */ static inline int disable_kretprobe(struct kretprobe *rp) { return disable_kprobe(&rp->kp); } static inline int enable_kretprobe(struct kretprobe *rp) { return enable_kprobe(&rp->kp); } #ifndef CONFIG_KPROBES static inline bool is_kprobe_insn_slot(unsigned long addr) { return false; } #endif #ifndef CONFIG_OPTPROBES static inline bool is_kprobe_optinsn_slot(unsigned long addr) { return false; } #endif /* Returns true if kprobes handled the fault */ static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs, unsigned int trap) { if (!kprobes_built_in()) return false; if (user_mode(regs)) return false; /* * To be potentially processing a kprobe fault and to be allowed * to call kprobe_running(), we have to be non-preemptible. */ if (preemptible()) return false; if (!kprobe_running()) return false; return kprobe_fault_handler(regs, trap); } #endif /* _LINUX_KPROBES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H #include <linux/if_vlan.h> #include <uapi/linux/tcp.h> #include <uapi/linux/udp.h> #include <uapi/linux/virtio_net.h> static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: skb->protocol = cpu_to_be16(ETH_P_IP); break; case VIRTIO_NET_HDR_GSO_TCPV6: skb->protocol = cpu_to_be16(ETH_P_IPV6); break; default: return -EINVAL; } return 0; } static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { unsigned int gso_type = 0; unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; ip_proto = IPPROTO_UDP; thlen = sizeof(struct udphdr); break; default: return -EINVAL; } if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) return -EINVAL; } skb_reset_mac_header(skb); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start); u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); if (!pskb_may_pull(skb, needed)) return -EINVAL; if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; p_off = skb_transport_offset(skb) + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ if (gso_type && skb->network_header) { struct flow_keys_basic keys; if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); virtio_net_hdr_set_proto(skb, hdr); if (protocol && protocol != skb->protocol) return -EINVAL; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, 0)) { /* UFO does not specify ipv4 or 6: try both */ if (gso_type & SKB_GSO_UDP && skb->protocol == htons(ETH_P_IP)) { skb->protocol = htons(ETH_P_IPV6); goto retry; } return -EINVAL; } p_off = keys.control.thoff + thlen; if (!pskb_may_pull(skb, p_off) || keys.basic.ip_proto != ip_proto) return -EINVAL; skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { p_off = thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); struct skb_shared_info *shinfo = skb_shinfo(skb); /* Too small packets are not really GSO ones. */ if (skb->len - p_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ shinfo->gso_type |= SKB_GSO_DODGY; shinfo->gso_segs = 0; } } return 0; } static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, bool has_data_valid, int vlan_hlen) { memset(hdr, 0, sizeof(*hdr)); /* no info leak */ if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); /* This is a hint as to how much should be linear. */ hdr->hdr_len = __cpu_to_virtio16(little_endian, skb_headlen(skb)); hdr->gso_size = __cpu_to_virtio16(little_endian, sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; if (skb->ip_summed == CHECKSUM_PARTIAL) { hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = __cpu_to_virtio16(little_endian, skb_checksum_start_offset(skb) + vlan_hlen); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); } else if (has_data_valid && skb->ip_summed == CHECKSUM_UNNECESSARY) { hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; } #endif /* _LINUX_VIRTIO_NET_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_STRING_HELPERS_H_ #define _LINUX_STRING_HELPERS_H_ #include <linux/ctype.h> #include <linux/types.h> struct file; struct task_struct; /* Descriptions of the types of units to * print in */ enum string_size_units { STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ STRING_UNITS_2, /* use binary powers of 2^10 */ }; void string_get_size(u64 size, u64 blk_size, enum string_size_units units, char *buf, int len); #define UNESCAPE_SPACE 0x01 #define UNESCAPE_OCTAL 0x02 #define UNESCAPE_HEX 0x04 #define UNESCAPE_SPECIAL 0x08 #define UNESCAPE_ANY \ (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) int string_unescape(char *src, char *dst, size_t size, unsigned int flags); static inline int string_unescape_inplace(char *buf, unsigned int flags) { return string_unescape(buf, buf, 0, flags); } static inline int string_unescape_any(char *src, char *dst, size_t size) { return string_unescape(src, dst, size, UNESCAPE_ANY); } static inline int string_unescape_any_inplace(char *buf) { return string_unescape_any(buf, buf, 0); } #define ESCAPE_SPACE 0x01 #define ESCAPE_SPECIAL 0x02 #define ESCAPE_NULL 0x04 #define ESCAPE_OCTAL 0x08 #define ESCAPE_ANY \ (ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL) #define ESCAPE_NP 0x10 #define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) #define ESCAPE_HEX 0x20 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *only); int string_escape_mem_ascii(const char *src, size_t isz, char *dst, size_t osz); static inline int string_escape_mem_any_np(const char *src, size_t isz, char *dst, size_t osz, const char *only) { return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, only); } static inline int string_escape_str(const char *src, char *dst, size_t sz, unsigned int flags, const char *only) { return string_escape_mem(src, strlen(src), dst, sz, flags, only); } static inline int string_escape_str_any_np(const char *src, char *dst, size_t sz, const char *only) { return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); } static inline void string_upper(char *dst, const char *src) { do { *dst++ = toupper(*src); } while (*src++); } static inline void string_lower(char *dst, const char *src) { do { *dst++ = tolower(*src); } while (*src++); } char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); void kfree_strarray(char **array, size_t n); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 /* * DRBG based on NIST SP800-90A * * Copyright Stephan Mueller <smueller@chronox.de>, 2014 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, and the entire permission notice in its entirety, * including the disclaimer of warranties. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * ALTERNATIVELY, this product may be distributed under the terms of * the GNU General Public License, in which case the provisions of the GPL are * required INSTEAD OF the above restrictions. (This clause is * necessary due to a potential bad interaction between the GPL and * the restrictions contained in a BSD-style copyright.) * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #ifndef _DRBG_H #define _DRBG_H #include <linux/random.h> #include <linux/scatterlist.h> #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/module.h> #include <linux/crypto.h> #include <linux/slab.h> #include <crypto/internal/rng.h> #include <crypto/rng.h> #include <linux/fips.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/workqueue.h> /* * Concatenation Helper and string operation helper * * SP800-90A requires the concatenation of different data. To avoid copying * buffers around or allocate additional memory, the following data structure * is used to point to the original memory with its size. In addition, it * is used to build a linked list. The linked list defines the concatenation * of individual buffers. The order of memory block referenced in that * linked list determines the order of concatenation. */ struct drbg_string { const unsigned char *buf; size_t len; struct list_head list; }; static inline void drbg_string_fill(struct drbg_string *string, const unsigned char *buf, size_t len) { string->buf = buf; string->len = len; INIT_LIST_HEAD(&string->list); } struct drbg_state; typedef uint32_t drbg_flag_t; struct drbg_core { drbg_flag_t flags; /* flags for the cipher */ __u8 statelen; /* maximum state length */ __u8 blocklen_bytes; /* block size of output in bytes */ char cra_name[CRYPTO_MAX_ALG_NAME]; /* mapping to kernel crypto API */ /* kernel crypto API backend cipher name */ char backend_cra_name[CRYPTO_MAX_ALG_NAME]; }; struct drbg_state_ops { int (*update)(struct drbg_state *drbg, struct list_head *seed, int reseed); int (*generate)(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct list_head *addtl); int (*crypto_init)(struct drbg_state *drbg); int (*crypto_fini)(struct drbg_state *drbg); }; struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ unsigned char *Vbuf; /* hash: static value 10.1.1.1 1b) hmac / ctr: key */ unsigned char *C; unsigned char *Cbuf; /* Number of RNG requests since last reseed -- 10.1.1.1 1c) */ size_t reseed_ctr; size_t reseed_threshold; /* some memory the DRBG can use for its operation */ unsigned char *scratchpad; unsigned char *scratchpadbuf; void *priv_data; /* Cipher handle */ struct crypto_skcipher *ctr_handle; /* CTR mode cipher handle */ struct skcipher_request *ctr_req; /* CTR mode request handle */ __u8 *outscratchpadbuf; /* CTR mode output scratchpad */ __u8 *outscratchpad; /* CTR mode aligned outbuf */ struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ bool seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->statelen; return 0; } static inline __u8 drbg_blocklen(struct drbg_state *drbg) { if (drbg && drbg->core) return drbg->core->blocklen_bytes; return 0; } static inline __u8 drbg_keylen(struct drbg_state *drbg) { if (drbg && drbg->core) return (drbg->core->statelen - drbg->core->blocklen_bytes); return 0; } static inline size_t drbg_max_request_bytes(struct drbg_state *drbg) { /* SP800-90A requires the limit 2**19 bits, but we return bytes */ return (1 << 16); } static inline size_t drbg_max_addtl(struct drbg_state *drbg) { /* SP800-90A requires 2**35 bytes additional info str / pers str */ #if (__BITS_PER_LONG == 32) /* * SP800-90A allows smaller maximum numbers to be returned -- we * return SIZE_MAX - 1 to allow the verification of the enforcement * of this value in drbg_healthcheck_sanity. */ return (SIZE_MAX - 1); #else return (1UL<<35); #endif } static inline size_t drbg_max_requests(struct drbg_state *drbg) { /* SP800-90A requires 2**48 maximum requests before reseeding */ return (1<<20); } /* * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data. * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl) { return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_generate() to allow the caller to provide additional data and * allow furnishing of test_data * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes * @outlen length of output buffer -- see crypto_rng_get_bytes * @addtl_input additional information string input buffer * @addtllen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_get_bytes */ static inline int crypto_drbg_get_bytes_addtl_test(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_generate(drng, addtl->buf, addtl->len, outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of * crypto_rng_reset() to allow the caller to provide test_data * * @drng DRBG handle -- see crypto_rng_reset * @pers personalization string input buffer * @perslen length of additional information string buffer * @test_data filled test data * * return * see crypto_rng_reset */ static inline int crypto_drbg_reset_test(struct crypto_rng *drng, struct drbg_string *pers, struct drbg_test_data *test_data) { crypto_rng_set_entropy(drng, test_data->testentropy->buf, test_data->testentropy->len); return crypto_rng_reset(drng, pers->buf, pers->len); } /* DRBG type flags */ #define DRBG_CTR ((drbg_flag_t)1<<0) #define DRBG_HMAC ((drbg_flag_t)1<<1) #define DRBG_HASH ((drbg_flag_t)1<<2) #define DRBG_TYPE_MASK (DRBG_CTR | DRBG_HMAC | DRBG_HASH) /* DRBG strength flags */ #define DRBG_STRENGTH128 ((drbg_flag_t)1<<3) #define DRBG_STRENGTH192 ((drbg_flag_t)1<<4) #define DRBG_STRENGTH256 ((drbg_flag_t)1<<5) #define DRBG_STRENGTH_MASK (DRBG_STRENGTH128 | DRBG_STRENGTH192 | \ DRBG_STRENGTH256) enum drbg_prefixes { DRBG_PREFIX0 = 0x00, DRBG_PREFIX1, DRBG_PREFIX2, DRBG_PREFIX3 }; #endif /* _DRBG_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_GENERIC_GETORDER_H #define __ASM_GENERIC_GETORDER_H #ifndef __ASSEMBLY__ #include <linux/compiler.h> #include <linux/log2.h> /** * get_order - Determine the allocation order of a memory size * @size: The size for which to get the order * * Determine the allocation order of a particular sized block of memory. This * is on a logarithmic scale, where: * * 0 -> 2^0 * PAGE_SIZE and below * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 * ... * * The order returned is used to find the smallest allocation granule required * to hold an object of the specified size. * * The result is undefined if the size is 0. */ static inline __attribute_const__ int get_order(unsigned long size) { if (__builtin_constant_p(size)) { if (!size) return BITS_PER_LONG - PAGE_SHIFT; if (size < (1UL << PAGE_SHIFT)) return 0; return ilog2((size) - 1) - PAGE_SHIFT + 1; } size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 return fls(size); #else return fls64(size); #endif } #endif /* __ASSEMBLY__ */ #endif /* __ASM_GENERIC_GETORDER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 // SPDX-License-Identifier: GPL-2.0+ /* * ext4_jbd2.h * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * * Copyright 1998--1999 Red Hat corp --- All Rights Reserved * * Ext4-specific journaling extensions. */ #ifndef _EXT4_JBD2_H #define _EXT4_JBD2_H #include <linux/fs.h> #include <linux/jbd2.h> #include "ext4.h" #define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) /* Define the number of blocks we need to account to a transaction to * modify one block of data. * * We may have to touch one inode, one bitmap buffer, up to three * indirection blocks, the group and superblock summaries, and the data * block to complete the transaction. * * For extents-enabled fs we may have to allocate and modify up to * 5 levels of tree, data block (for each of these we need bitmap + group * summaries), root which is stored in the inode, sb */ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ (ext4_has_feature_extents(sb) ? 20U : 8U) /* Extended attribute operations touch at most two data buffers, * two bitmap buffers, and two group summaries, in addition to the inode * and the superblock, which are already accounted for. */ #define EXT4_XATTR_TRANS_BLOCKS 6U /* Define the minimum size for a transaction which modifies data. This * needs to take into account the fact that we may end up modifying two * quota files too (one for the group, one for the user quota). The * superblock only gets updated once, of course, so don't bother * counting that again for the quota updates. */ #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ EXT4_XATTR_TRANS_BLOCKS - 2 + \ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* * Define the number of metadata blocks we need to account to modify data. * * This include super block, inode block, quota blocks and xattr blocks */ #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* Define an arbitrary limit for the amount of data we will anticipate * writing to any given transaction. For unbounded transactions such as * write(2) and truncate(2) we can write more than this, but we always * start off at the maximum transaction size and grow the transaction * optimistically as we go. */ #define EXT4_MAX_TRANS_DATA 64U /* We break up a large truncate or write transaction once the handle's * buffer credits gets this low, we need either to extend the * transaction or to start a new one. Reserve enough space here for * inode, bitmap, superblock, group and indirection updates for at least * one block, plus two quota updates. Quota allocations are not * needed. */ #define EXT4_RESERVE_TRANS_BLOCKS 12U /* * Number of credits needed if we need to insert an entry into a * directory. For each new index block, we need 4 blocks (old index * block, new index block, bitmap block, bg summary). For normal * htree directories there are 2 levels; if the largedir feature * enabled it's 3 levels. */ #define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ #define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ext4_has_feature_quota(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ext4_has_feature_quota(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) #define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ ext4_has_feature_quota(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 #endif #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) /* * Ext4 handle operation types -- for logging purposes */ #define EXT4_HT_MISC 0 #define EXT4_HT_INODE 1 #define EXT4_HT_WRITE_PAGE 2 #define EXT4_HT_MAP_BLOCKS 3 #define EXT4_HT_DIR 4 #define EXT4_HT_TRUNCATE 5 #define EXT4_HT_QUOTA 6 #define EXT4_HT_RESIZE 7 #define EXT4_HT_MIGRATE 8 #define EXT4_HT_MOVE_EXTENTS 9 #define EXT4_HT_XATTR 10 #define EXT4_HT_EXT_CONVERT 11 #define EXT4_HT_MAX 12 /** * struct ext4_journal_cb_entry - Base structure for callback information. * * This struct is a 'seed' structure for a using with your own callback * structs. If you are using callbacks you must allocate one of these * or another struct of your own definition which has this struct * as it's first element and pass it to ext4_journal_callback_add(). */ struct ext4_journal_cb_entry { /* list information for other callbacks attached to the same handle */ struct list_head jce_list; /* Function to call with this callback structure */ void (*jce_func)(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error); /* user data goes here */ }; /** * ext4_journal_callback_add: add a function to call after transaction commit * @handle: active journal transaction handle to register callback on * @func: callback function to call after the transaction has committed: * @sb: superblock of current filesystem for transaction * @jce: returned journal callback data * @rc: journal state at commit (0 = transaction committed properly) * @jce: journal callback data (internal and function private data struct) * * The registered function will be called in the context of the journal thread * after the transaction for which the handle was created has completed. * * No locks are held when the callback function is called, so it is safe to * call blocking functions from within the callback, but the callback should * not block or run for too long, or the filesystem will be blocked waiting for * the next transaction to commit. No journaling functions can be used, or * there is a risk of deadlock. * * There is no guaranteed calling order of multiple registered callbacks on * the same transaction. */ static inline void _ext4_journal_callback_add(handle_t *handle, struct ext4_journal_cb_entry *jce) { /* Add the jce to transaction's private list */ list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); } static inline void ext4_journal_callback_add(handle_t *handle, void (*func)(struct super_block *sb, struct ext4_journal_cb_entry *jce, int rc), struct ext4_journal_cb_entry *jce) { struct ext4_sb_info *sbi = EXT4_SB(handle->h_transaction->t_journal->j_private); /* Add the jce to transaction's private list */ jce->jce_func = func; spin_lock(&sbi->s_md_lock); _ext4_journal_callback_add(handle, jce); spin_unlock(&sbi->s_md_lock); } /** * ext4_journal_callback_del: delete a registered callback * @handle: active journal transaction handle on which callback was registered * @jce: registered journal callback entry to unregister * Return true if object was successfully removed */ static inline bool ext4_journal_callback_try_del(handle_t *handle, struct ext4_journal_cb_entry *jce) { bool deleted; struct ext4_sb_info *sbi = EXT4_SB(handle->h_transaction->t_journal->j_private); spin_lock(&sbi->s_md_lock); deleted = !list_empty(&jce->jce_list); list_del_init(&jce->jce_list); spin_unlock(&sbi->s_md_lock); return deleted; } int ext4_mark_iloc_dirty(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc); /* * On success, We end up with an outstanding reference count against * iloc->bh. This _must_ be cleaned up later. */ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc); #define ext4_mark_inode_dirty(__h, __i) \ __ext4_mark_inode_dirty((__h), (__i), __func__, __LINE__) int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, const char *func, unsigned int line); int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, struct ext4_iloc *iloc); /* * Wrapper functions with which ext4 calls into JBD. */ int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); int __ext4_journal_get_create_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh); int __ext4_handle_dirty_super(const char *where, unsigned int line, handle_t *handle, struct super_block *sb); #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \ (bh), (block_nr)) #define ext4_journal_get_create_access(handle, bh) \ __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh)) #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) #define ext4_handle_dirty_super(handle, sb) \ __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, int revoke_creds); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) /* Note: Do not use this for NULL handles. This is only to determine if * a properly allocated handle is using a journal or not. */ static inline int ext4_handle_valid(handle_t *handle) { if ((unsigned long)handle < EXT4_NOJOURNAL_MAX_REF_COUNT) return 0; return 1; } static inline void ext4_handle_sync(handle_t *handle) { if (ext4_handle_valid(handle)) handle->h_sync = 1; } static inline int ext4_handle_is_aborted(handle_t *handle) { if (ext4_handle_valid(handle)) return is_handle_aborted(handle); return 0; } static inline int ext4_free_metadata_revoke_credits(struct super_block *sb, int blocks) { /* Freeing each metadata block can result in freeing one cluster */ return blocks * EXT4_SB(sb)->s_cluster_ratio; } static inline int ext4_trans_default_revoke_credits(struct super_block *sb) { return ext4_free_metadata_revoke_credits(sb, 8); } #define ext4_journal_start_sb(sb, type, nblocks) \ __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0, \ ext4_trans_default_revoke_credits(sb)) #define ext4_journal_start(inode, type, nblocks) \ __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0, \ ext4_trans_default_revoke_credits((inode)->i_sb)) #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks)\ __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks),\ ext4_trans_default_revoke_credits((inode)->i_sb)) #define ext4_journal_start_with_revoke(inode, type, blocks, revoke_creds) \ __ext4_journal_start((inode), __LINE__, (type), (blocks), 0, \ (revoke_creds)) static inline handle_t *__ext4_journal_start(struct inode *inode, unsigned int line, int type, int blocks, int rsv_blocks, int revoke_creds) { return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, rsv_blocks, revoke_creds); } #define ext4_journal_stop(handle) \ __ext4_journal_stop(__func__, __LINE__, (handle)) #define ext4_journal_start_reserved(handle, type) \ __ext4_journal_start_reserved((handle), __LINE__, (type)) handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, int type); static inline handle_t *ext4_journal_current_handle(void) { return journal_current_handle(); } static inline int ext4_journal_extend(handle_t *handle, int nblocks, int revoke) { if (ext4_handle_valid(handle)) return jbd2_journal_extend(handle, nblocks, revoke); return 0; } static inline int ext4_journal_restart(handle_t *handle, int nblocks, int revoke) { if (ext4_handle_valid(handle)) return jbd2__journal_restart(handle, nblocks, revoke, GFP_NOFS); return 0; } int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, int extend_cred, int revoke_cred); /* * Ensure @handle has at least @check_creds credits available. If not, * transaction will be extended or restarted to contain at least @extend_cred * credits. Before restarting transaction @fn is executed to allow for cleanup * before the transaction is restarted. * * The return value is < 0 in case of error, 0 in case the handle has enough * credits or transaction extension succeeded, 1 in case transaction had to be * restarted. */ #define ext4_journal_ensure_credits_fn(handle, check_cred, extend_cred, \ revoke_cred, fn) \ ({ \ __label__ __ensure_end; \ int err = __ext4_journal_ensure_credits((handle), (check_cred), \ (extend_cred), (revoke_cred)); \ \ if (err <= 0) \ goto __ensure_end; \ err = (fn); \ if (err < 0) \ goto __ensure_end; \ err = ext4_journal_restart((handle), (extend_cred), (revoke_cred)); \ if (err == 0) \ err = 1; \ __ensure_end: \ err; \ }) /* * Ensure given handle has at least requested amount of credits available, * possibly restarting transaction if needed. We also make sure the transaction * has space for at least ext4_trans_default_revoke_credits(sb) revoke records * as freeing one or two blocks is very common pattern and requesting this is * very cheap. */ static inline int ext4_journal_ensure_credits(handle_t *handle, int credits, int revoke_creds) { return ext4_journal_ensure_credits_fn(handle, credits, credits, revoke_creds, 0); } static inline int ext4_journal_blocks_per_page(struct inode *inode) { if (EXT4_JOURNAL(inode) != NULL) return jbd2_journal_blocks_per_page(inode); return 0; } static inline int ext4_journal_force_commit(journal_t *journal) { if (journal) return jbd2_journal_force_commit(journal); return 0; } static inline int ext4_jbd2_inode_add_write(handle_t *handle, struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) return jbd2_journal_inode_ranged_write(handle, EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline int ext4_jbd2_inode_add_wait(handle_t *handle, struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) return jbd2_journal_inode_ranged_wait(handle, EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline void ext4_update_inode_fsync_trans(handle_t *handle, struct inode *inode, int datasync) { struct ext4_inode_info *ei = EXT4_I(inode); if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; if (datasync) ei->i_datasync_tid = handle->h_transaction->t_tid; } } /* super.c */ int ext4_force_commit(struct super_block *sb); /* * Ext4 inode journal modes */ #define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ int ext4_inode_journal_mode(struct inode *inode); static inline int ext4_should_journal_data(struct inode *inode) { return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; } static inline int ext4_should_order_data(struct inode *inode) { return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; } static inline int ext4_should_writeback_data(struct inode *inode) { return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; } static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks) { if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) return 0; if (!ext4_should_journal_data(inode)) return 0; /* * Data blocks in one extent are contiguous, just account for partial * clusters at extent boundaries */ return blocks + 2*(EXT4_SB(inode->i_sb)->s_cluster_ratio - 1); } /* * This function controls whether or not we should try to go down the * dioread_nolock code paths, which makes it safe to avoid taking * i_mutex for direct I/O reads. This only works for extent-based * files, and it doesn't work if data journaling is enabled, since the * dioread_nolock code uses b_private to pass information back to the * I/O completion handler, and this conflicts with the jbd's use of * b_private. */ static inline int ext4_should_dioread_nolock(struct inode *inode) { if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) return 0; if (!S_ISREG(inode->i_mode)) return 0; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return 0; if (ext4_should_journal_data(inode)) return 0; /* temporary fix to prevent generic/422 test failures */ if (!test_opt(inode->i_sb, DELALLOC)) return 0; return 1; } #endif /* _EXT4_JBD2_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 /* SPDX-License-Identifier: GPL-2.0 */ /* * This is <linux/capability.h> * * Andrew G. Morgan <morgan@kernel.org> * Alexander Kjeldaas <astor@guardian.no> * with help from Aleph1, Roland Buresund and Andrew Main. * * See here for the libcap library ("POSIX draft" compliance): * * ftp://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/ */ #ifndef _LINUX_CAPABILITY_H #define _LINUX_CAPABILITY_H #include <uapi/linux/capability.h> #include <linux/uidgid.h> #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 extern int file_caps_enabled; typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; /* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; kernel_cap_t permitted; kernel_cap_t inheritable; kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) struct file; struct inode; struct dentry; struct task_struct; struct user_namespace; extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_init_eff_set; /* * Internal kernel functions only */ #define CAP_FOR_EACH_U32(__capi) \ for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) /* * CAP_FS_MASK and CAP_NFSD_MASKS: * * The fs mask is all the privileges that fsuid==0 historically meant. * At one time in the past, that included CAP_MKNOD and CAP_LINUX_IMMUTABLE. * * It has never meant setting security.* and trusted.* xattrs. * * We could also define fsmask as follows: * 1. CAP_FS_MASK is the privilege to bypass all fs-related DAC permissions * 2. The security.* and trusted.* xattrs are fs-related MAC permissions */ # define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ | CAP_TO_MASK(CAP_MKNOD) \ | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ | CAP_TO_MASK(CAP_FOWNER) \ | CAP_TO_MASK(CAP_FSETID)) # define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) #if _KERNEL_CAPABILITY_U32S != 2 # error Fix up hand-coded capability macro initializers #else /* HAND-CODED capability initializers */ #define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) #define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) # define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) # define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_SYS_RESOURCE), \ CAP_FS_MASK_B1 } }) #endif /* _KERNEL_CAPABILITY_U32S != 2 */ # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) #define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) #define CAP_BOP_ALL(c, a, b, OP) \ do { \ unsigned __capi; \ CAP_FOR_EACH_U32(__capi) { \ c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ } \ } while (0) #define CAP_UOP_ALL(c, a, OP) \ do { \ unsigned __capi; \ CAP_FOR_EACH_U32(__capi) { \ c.cap[__capi] = OP a.cap[__capi]; \ } \ } while (0) static inline kernel_cap_t cap_combine(const kernel_cap_t a, const kernel_cap_t b) { kernel_cap_t dest; CAP_BOP_ALL(dest, a, b, |); return dest; } static inline kernel_cap_t cap_intersect(const kernel_cap_t a, const kernel_cap_t b) { kernel_cap_t dest; CAP_BOP_ALL(dest, a, b, &); return dest; } static inline kernel_cap_t cap_drop(const kernel_cap_t a, const kernel_cap_t drop) { kernel_cap_t dest; CAP_BOP_ALL(dest, a, drop, &~); return dest; } static inline kernel_cap_t cap_invert(const kernel_cap_t c) { kernel_cap_t dest; CAP_UOP_ALL(dest, c, ~); return dest; } static inline bool cap_isclear(const kernel_cap_t a) { unsigned __capi; CAP_FOR_EACH_U32(__capi) { if (a.cap[__capi] != 0) return false; } return true; } /* * Check if "a" is a subset of "set". * return true if ALL of the capabilities in "a" are also in "set" * cap_issubset(0101, 1111) will return true * return false if ANY of the capabilities in "a" are not in "set" * cap_issubset(1111, 0101) will return false */ static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { kernel_cap_t dest; dest = cap_drop(a, set); return cap_isclear(dest); } /* Used to decide between falling back on the old suser() or fsuser(). */ static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { const kernel_cap_t __cap_fs_set = CAP_FS_SET; return cap_drop(a, __cap_fs_set); } static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, const kernel_cap_t permitted) { const kernel_cap_t __cap_fs_set = CAP_FS_SET; return cap_combine(a, cap_intersect(permitted, __cap_fs_set)); } static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; return cap_drop(a, __cap_fs_set); } static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, const kernel_cap_t permitted) { const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; return cap_combine(a, cap_intersect(permitted, __cap_nfsd_set)); } #ifdef CONFIG_MULTIUSER extern bool has_capability(struct task_struct *t, int cap); extern bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap); extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); extern bool ns_capable_setid(struct user_namespace *ns, int cap); #else static inline bool has_capability(struct task_struct *t, int cap) { return true; } static inline bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap) { return true; } static inline bool has_capability_noaudit(struct task_struct *t, int cap) { return true; } static inline bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap) { return true; } static inline bool capable(int cap) { return true; } static inline bool ns_capable(struct user_namespace *ns, int cap) { return true; } static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) { return true; } static inline bool ns_capable_setid(struct user_namespace *ns, int cap) { return true; } #endif /* CONFIG_MULTIUSER */ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); static inline bool perfmon_capable(void) { return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN); } static inline bool bpf_capable(void) { return capable(CAP_BPF) || capable(CAP_SYS_ADMIN); } static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) { return ns_capable(ns, CAP_CHECKPOINT_RESTORE) || ns_capable(ns, CAP_SYS_ADMIN); } /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); #endif /* !_LINUX_CAPABILITY_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H #include <asm/processor.h> #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include <asm/asm.h> #include <linux/bitops.h> enum cpuid_leafs { CPUID_1_EDX = 0, CPUID_8000_0001_EDX, CPUID_8086_0001_EDX, CPUID_LNX_1, CPUID_1_ECX, CPUID_C000_0001_EDX, CPUID_8000_0001_ECX, CPUID_LNX_2, CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, CPUID_LNX_4, CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; #define X86_CAP_FMT "%s" #define x86_cap_flag(flag) x86_cap_flags[flag] #else #define X86_CAP_FMT "%d:%d" #define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31) #endif /* * In order to save room, we index into this array by doing * X86_BUG_<name> - NCAPINTS*32. */ extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits * (selected with (bit>>5) give us the word number and the low 5 * bits give us the bit/feature number inside the word. * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can * see if it is set in the mask word. */ #define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) /* * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all * header macros which use NCAPINTS need to be changed. The duplicated macro * use causes the compiler to issue errors for all headers so that all usage * sites can be corrected. */ #define REQUIRED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ x86_this_cpu_test_bit(bit, \ (unsigned long __percpu *)&cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel * infrastructure to be used. It may *not* directly test the CPU * itself. Use the cpu_has() family if you want true runtime * testing of CPU features, like in hypervisor code where you are * supporting a possible guest feature where host support for it * is not relevant. */ #define cpu_feature_enabled(bit) \ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) #if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) /* * Workaround for the sake of BPF compilation which utilizes kernel * headers, but clang does not support ASM GOTO and fails the build. */ #ifndef __BPF_TRACING__ #warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" #endif #define static_cpu_has(bit) boot_cpu_has(bit) #else /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use * static_cpu_has() only in fast paths, where every cycle counts. Which * means that the boot_cpu_has() variant is already fast enough for the * majority of cases and you should stick to using it as it is generally * only two instructions: a RIP-relative MOV and a TEST. */ static __always_inline bool _static_cpu_has(u16 bit) { asm_volatile_goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" "3:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ " .long 4f - .\n" /* repl offset */ " .word %P[always]\n" /* always replace */ " .byte 3b - 1b\n" /* src len */ " .byte 5f - 4f\n" /* repl len */ " .byte 3b - 2b\n" /* pad len */ ".previous\n" ".section .altinstr_replacement,\"ax\"\n" "4: jmp %l[t_no]\n" "5:\n" ".previous\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ " .long 0\n" /* no replacement */ " .word %P[feature]\n" /* feature bit */ " .byte 3b - 1b\n" /* src len */ " .byte 0\n" /* repl len */ " .byte 0\n" /* pad len */ ".previous\n" ".section .altinstr_aux,\"ax\"\n" "6:\n" " testb %[bitnum],%[cap_byte]\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" ".previous\n" : : [feature] "i" (bit), [always] "i" (X86_FEATURE_ALWAYS), [bitnum] "i" (1 << (bit & 7)), [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); t_yes: return true; t_no: return false; } #define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) #endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) #define static_cpu_has_bug(bit) static_cpu_has((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) #define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) #define MAX_CPU_FEATURES (NCAPINTS * 32) #define cpu_have_feature boot_cpu_has #define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ #endif /* _ASM_X86_CPUFEATURE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PROCESSOR_H #define _ASM_X86_PROCESSOR_H #include <asm/processor-flags.h> /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct io_bitmap; struct vm86; #include <asm/math_emu.h> #include <asm/segment.h> #include <asm/types.h> #include <uapi/asm/sigcontext.h> #include <asm/current.h> #include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> #include <asm/msr.h> #include <asm/desc_defs.h> #include <asm/nops.h> #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> #include <asm/vmxfeatures.h> #include <asm/vdso/processor.h> #include <linux/personality.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/math64.h> #include <linux/err.h> #include <linux/irqflags.h> #include <linux/mem_encrypt.h> /* * We handle most unaligned accesses in hardware. On the other hand * unaligned DMA can be quite expensive on some Nehalem processors. * * Based on this we disable the IP header alignment in network drivers. */ #define NET_IP_ALIGN 0 #define HBP_NUM 4 /* * These alignment constraints are for performance in the vSMP case, * but in the task_struct case we must also meet hardware imposed * alignment requirements of the FPU state: */ #ifdef CONFIG_X86_VSMP # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) #else # define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif enum tlb_infos { ENTRIES, NR_INFO }; extern u16 __read_mostly tlb_lli_4k[NR_INFO]; extern u16 __read_mostly tlb_lli_2m[NR_INFO]; extern u16 __read_mostly tlb_lli_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_4k[NR_INFO]; extern u16 __read_mostly tlb_lld_2m[NR_INFO]; extern u16 __read_mostly tlb_lld_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_1g[NR_INFO]; /* * CPU type and hardware bug flags. Kept separately for each CPU. * Members of this structure are referenced in head_32.S, so think twice * before touching them. [mj] */ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif #ifdef CONFIG_X86_VMX_FEATURE_NAMES __u32 vmx_capability[NVMXINTS]; #endif __u8 x86_virt_bits; __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; /* * Align to size of unsigned long because the x86_capability array * is passed to bitops which require the alignment. Use unnamed * union to enforce the array is aligned to size of unsigned long. */ union { __u32 x86_capability[NCAPINTS + NBUGINTS]; unsigned long x86_capability_alignment; }; char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ u16 x86_max_cores; u16 apicid; u16 initial_apicid; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; /* Physical processor id: */ u16 phys_proc_id; /* Logical processor id: */ u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; u16 cpu_die_id; u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; /* Address space bits used by the cache internally */ u8 x86_cache_bits; unsigned initialized : 1; } __randomize_layout; struct cpuid_regs { u32 eax, ebx, ecx, edx; }; enum cpuid_regs_idx { CPUID_EAX = 0, CPUID_EBX, CPUID_ECX, CPUID_EDX, }; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 #define X86_VENDOR_AMD 2 #define X86_VENDOR_UMC 3 #define X86_VENDOR_CENTAUR 5 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 #define X86_VENDOR_ZHAOXIN 10 #define X86_VENDOR_NUM 11 #define X86_VENDOR_UNKNOWN 0xff /* * capabilities of CPUs */ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #else #define cpu_info boot_cpu_data #define cpu_data(cpu) boot_cpu_data #endif extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); #ifdef CONFIG_X86_32 extern int have_cpuid_p(void); #else static inline int have_cpuid_p(void) { return 1; } #endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ asm volatile("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (*eax), "2" (*ecx) : "memory"); } #define native_cpuid_reg(reg) \ static inline unsigned int native_cpuid_##reg(unsigned int op) \ { \ unsigned int eax = op, ebx, ecx = 0, edx; \ \ native_cpuid(&eax, &ebx, &ecx, &edx); \ \ return reg; \ } /* * Native CPUID functions returning a single datum. */ native_cpuid_reg(eax) native_cpuid_reg(ebx) native_cpuid_reg(ecx) native_cpuid_reg(edx) /* * Friendlier CR3 helpers. */ static inline unsigned long read_cr3_pa(void) { return __read_cr3() & CR3_ADDR_MASK; } static inline unsigned long native_read_cr3_pa(void) { return __native_read_cr3() & CR3_ADDR_MASK; } static inline void load_cr3(pgd_t *pgdir) { write_cr3(__sme_pa(pgdir)); } /* * Note that while the legacy 'TSS' name comes from 'Task State Segment', * on modern x86 CPUs the TSS also holds information important to 64-bit mode, * unrelated to the task-switch mechanism: */ #ifdef CONFIG_X86_32 /* This is the TSS defined by the hardware. */ struct x86_hw_tss { unsigned short back_link, __blh; unsigned long sp0; unsigned short ss0, __ss0h; unsigned long sp1; /* * We don't use ring 1, so ss1 is a convenient scratch space in * the same cacheline as sp0. We use ss1 to cache the value in * MSR_IA32_SYSENTER_CS. When we context switch * MSR_IA32_SYSENTER_CS, we first check if the new value being * written matches ss1, and, if it's not, then we wrmsr the new * value and update ss1. * * The only reason we context switch MSR_IA32_SYSENTER_CS is * that we set it to zero in vm86 tasks to avoid corrupting the * stack if we were to go through the sysenter path from vm86 * mode. */ unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ unsigned short __ss1h; unsigned long sp2; unsigned short ss2, __ss2h; unsigned long __cr3; unsigned long ip; unsigned long flags; unsigned long ax; unsigned long cx; unsigned long dx; unsigned long bx; unsigned long sp; unsigned long bp; unsigned long si; unsigned long di; unsigned short es, __esh; unsigned short cs, __csh; unsigned short ss, __ssh; unsigned short ds, __dsh; unsigned short fs, __fsh; unsigned short gs, __gsh; unsigned short ldt, __ldth; unsigned short trace; unsigned short io_bitmap_base; } __attribute__((packed)); #else struct x86_hw_tss { u32 reserved1; u64 sp0; /* * We store cpu_current_top_of_stack in sp1 so it's always accessible. * Linux does not use ring 1, so sp1 is not otherwise needed. */ u64 sp1; /* * Since Linux does not use ring 2, the 'sp2' slot is unused by * hardware. entry_SYSCALL_64 uses it as scratch space to stash * the user RSP value. */ u64 sp2; u64 reserved2; u64 ist[7]; u32 reserved3; u32 reserved4; u16 reserved5; u16 io_bitmap_base; } __attribute__((packed)); #endif /* * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) #define IO_BITMAP_OFFSET_VALID_MAP \ (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) #define IO_BITMAP_OFFSET_VALID_ALL \ (offsetof(struct tss_struct, io_bitmap.mapall) - \ offsetof(struct tss_struct, x86_tss)) #ifdef CONFIG_X86_IOPL_IOPERM /* * sizeof(unsigned long) coming from an extra "long" at the end of the * iobitmap. The limit is inclusive, i.e. the last valid byte. */ # define __KERNEL_TSS_LIMIT \ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ sizeof(unsigned long) - 1) #else # define __KERNEL_TSS_LIMIT \ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) #endif /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { char stack[PAGE_SIZE]; }; struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); /* * All IO bitmap related data stored in the TSS: */ struct x86_io_bitmap { /* The sequence number of the last active bitmap. */ u64 prev_sequence; /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io * bitmap user will just set x86_tss.io_bitmap_base to a value * outside of the TSS limit. So for sane tasks there is no need to * actually touch the io_bitmap at all. */ unsigned int prev_max; /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ unsigned long bitmap[IO_BITMAP_LONGS + 1]; /* * Special I/O bitmap to emulate IOPL(3). All bytes zero, * except the additional byte at the end. */ unsigned long mapall[IO_BITMAP_LONGS + 1]; }; struct tss_struct { /* * The fixed hardware portion. This must not cross a page boundary * at risk of violating the SDM's advice and potentially triggering * errata. */ struct x86_hw_tss x86_tss; struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #else /* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 #endif #ifdef CONFIG_X86_64 struct fixed_percpu_data { /* * GCC hardcodes the stack canary as %gs:40. Since the * irq_stack is the object at %gs:0, we reserve the bottom * 48 bytes of the irq stack for the canary. */ char gs_base[40]; unsigned long stack_canary; }; DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; DECLARE_INIT_PER_CPU(fixed_percpu_data); static inline unsigned long cpu_kernelmode_gs_base(int cpu) { return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); } DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); #else /* X86_64 */ #ifdef CONFIG_STACKPROTECTOR /* * Make sure stack canary segment base is cached-aligned: * "For Intel Atom processors, avoid non zero segment base address * that is not aligned to cache line boundary at all cost." * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) */ struct stack_canary { char __pad[20]; /* canary at %gs:20 */ unsigned long canary; }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif /* Per CPU softirq stack pointer */ DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* X86_64 */ extern unsigned int fpu_kernel_xstate_size; extern unsigned int fpu_user_xstate_size; struct perf_event; struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; #ifdef CONFIG_X86_32 unsigned long sp0; #endif unsigned long sp; #ifdef CONFIG_X86_32 unsigned long sysenter_cs; #else unsigned short es; unsigned short ds; unsigned short fsindex; unsigned short gsindex; #endif #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; #else /* * XXX: this could presumably be unsigned short. Alternatively, * 32-bit kernels could be taught to use fsindex instead. */ unsigned long fs; unsigned long gs; #endif /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ unsigned long virtual_dr6; /* Keep track of the exact dr7 value set by the user */ unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; unsigned long trap_nr; unsigned long error_code; #ifdef CONFIG_VM86 /* Virtual 86 mode info */ struct vm86 *vm86; #endif /* IO permissions: */ struct io_bitmap *io_bitmap; /* * IOPL. Priviledge level dependent I/O permission which is * emulated via the I/O bitmap to prevent user space from disabling * interrupts. */ unsigned long iopl_emul; unsigned int sig_on_uaccess_err:1; /* Floating point and extended processor state */ struct fpu fpu; /* * WARNING: 'fpu' is dynamically-sized. It *MUST* be at * the end. */ }; /* Whitelist the FPU state from the task_struct for hardened usercopy. */ static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { *offset = offsetof(struct thread_struct, fpu.state); *size = fpu_kernel_xstate_size; } static inline void native_load_sp0(unsigned long sp0) { this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } static __always_inline void native_swapgs(void) { #ifdef CONFIG_X86_64 asm volatile("swapgs" ::: "memory"); #endif } static inline unsigned long current_top_of_stack(void) { /* * We can't read directly from tss.sp0: sp0 on x86_32 is special in * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ return this_cpu_read_stable(cpu_current_top_of_stack); } static inline bool on_thread_stack(void) { return (unsigned long)(current_top_of_stack() - current_stack_pointer) < THREAD_SIZE; } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define __cpuid native_cpuid static inline void load_sp0(unsigned long sp0) { native_load_sp0(sp0); } #endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); /* * Generic CPUID function * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. */ static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = 0; __cpuid(eax, ebx, ecx, edx); } /* Some CPUID calls want 'count' to be placed in ecx */ static inline void cpuid_count(unsigned int op, int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = count; __cpuid(eax, ebx, ecx, edx); } /* * CPUID functions returning a single datum */ static inline unsigned int cpuid_eax(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return eax; } static inline unsigned int cpuid_ebx(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return ebx; } static inline unsigned int cpuid_ecx(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return ecx; } static inline unsigned int cpuid_edx(unsigned int op) { unsigned int eax, ebx, ecx, edx; cpuid(op, &eax, &ebx, &ecx, &edx); return edx; } extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern void enable_sep_cpu(void); extern int sysenter_setup(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; extern void switch_to_new_gdt(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); static inline unsigned long get_debugctlmsr(void) { unsigned long debugctlmsr = 0; #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) return 0; #endif rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); return debugctlmsr; } static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) return; #endif wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } extern void set_task_blockstep(struct task_struct *task, bool on); /* Boot loader type from the setup header: */ extern int bootloader_type; extern int bootloader_version; extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW #define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else # define BASE_PREFETCH "prefetcht0 %P1" #endif /* * Prefetch instructions for Pentium III (+) and AMD Athlon (+) * * It's not worth to care about 3dnow prefetches for the K6 * because they are microcoded there and very slow. */ static inline void prefetch(const void *x) { alternative_input(BASE_PREFETCH, "prefetchnta %P1", X86_FEATURE_XMM, "m" (*(const char *)x)); } /* * 3dnow prefetch to get an exclusive cache line. * Useful for spinlocks to avoid one state transition in the * cache coherency protocol: */ static __always_inline void prefetchw(const void *x) { alternative_input(BASE_PREFETCH, "prefetchw %P1", X86_FEATURE_3DNOWPREFETCH, "m" (*(const char *)x)); } static inline void spin_lock_prefetch(const void *x) { prefetchw(x); } #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) #define task_pt_regs(task) \ ({ \ unsigned long __ptr = (unsigned long)task_stack_page(task); \ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ ((struct pt_regs *)__ptr) - 1; \ }) #ifdef CONFIG_X86_32 #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ } #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else #define INIT_THREAD { } extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); /* * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) #define KSTK_EIP(task) (task_pt_regs(task)->ip) /* Get/set a process' ability to use the timestamp counter instruction */ #define GET_TSC_CTL(adr) get_tsc_mode((adr)) #define SET_TSC_CTL(val) set_tsc_mode((val)) extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); #else static inline u16 amd_get_nb_id(int cpu) { return 0; } static inline u32 amd_get_nodes_per_socket(void) { return 0; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) { uint32_t base, eax, signature[3]; for (base = 0x40000000; base < 0x40010000; base += 0x100) { cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); if (!memcmp(sig, signature, 12) && (leaves == 0 || ((eax - base) >= leaves))) return base; } return 0; } extern unsigned long arch_align_stack(unsigned long sp); void free_init_pages(const char *what, unsigned long begin, unsigned long end); extern void free_kernel_image_pages(const char *what, void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN bool xen_set_default_idle(void); #else #define xen_set_default_idle 0 #endif void stop_this_cpu(void *dummy); void microcode_check(void); enum l1tf_mitigations { L1TF_MITIGATION_OFF, L1TF_MITIGATION_FLUSH_NOWARN, L1TF_MITIGATION_FLUSH, L1TF_MITIGATION_FLUSH_NOSMT, L1TF_MITIGATION_FULL, L1TF_MITIGATION_FULL_FORCE }; extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, MDS_MITIGATION_FULL, MDS_MITIGATION_VMWERV, }; #endif /* _ASM_X86_PROCESSOR_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 // SPDX-License-Identifier: GPL-2.0-or-later /* * Security plug functions * * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com> * Copyright (C) 2001-2002 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com> * Copyright (C) 2016 Mellanox Technologies */ #define pr_fmt(fmt) "LSM: " fmt #include <linux/bpf.h> #include <linux/capability.h> #include <linux/dcache.h> #include <linux/export.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/kernel_read_file.h> #include <linux/lsm_hooks.h> #include <linux/integrity.h> #include <linux/ima.h> #include <linux/evm.h> #include <linux/fsnotify.h> #include <linux/mman.h> #include <linux/mount.h> #include <linux/personality.h> #include <linux/backing-dev.h> #include <linux/string.h> #include <linux/msg.h> #include <net/flow.h> #define MAX_LSM_EVM_XATTR 2 /* How many LSMs were built into the kernel? */ #define LSM_COUNT (__end_lsm_info - __start_lsm_info) /* * These are descriptions of the reasons that can be passed to the * security_locked_down() LSM hook. Placing this array here allows * all security modules to use the same descriptions for auditing * purposes. */ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_NONE] = "none", [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading", [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port", [LOCKDOWN_EFI_TEST] = "/dev/efi_test access", [LOCKDOWN_KEXEC] = "kexec of unsigned images", [LOCKDOWN_HIBERNATION] = "hibernation", [LOCKDOWN_PCI_ACCESS] = "direct PCI access", [LOCKDOWN_IOPORT] = "raw io port access", [LOCKDOWN_MSR] = "raw MSR access", [LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables", [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage", [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO", [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters", [LOCKDOWN_MMIOTRACE] = "unsafe mmio", [LOCKDOWN_DEBUGFS] = "debugfs access", [LOCKDOWN_XMON_WR] = "xmon write access", [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", [LOCKDOWN_XMON_RW] = "xmon read and write access", [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality", }; struct security_hook_heads security_hook_heads __lsm_ro_after_init; static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain); static struct kmem_cache *lsm_file_cache; static struct kmem_cache *lsm_inode_cache; char *lsm_names; static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init; /* Boot-time LSM user choice */ static __initdata const char *chosen_lsm_order; static __initdata const char *chosen_major_lsm; static __initconst const char * const builtin_lsm_order = CONFIG_LSM; /* Ordered list of LSMs to initialize. */ static __initdata struct lsm_info **ordered_lsms; static __initdata struct lsm_info *exclusive; static __initdata bool debug; #define init_debug(...) \ do { \ if (debug) \ pr_info(__VA_ARGS__); \ } while (0) static bool __init is_enabled(struct lsm_info *lsm) { if (!lsm->enabled) return false; return *lsm->enabled; } /* Mark an LSM's enabled flag. */ static int lsm_enabled_true __initdata = 1; static int lsm_enabled_false __initdata = 0; static void __init set_enabled(struct lsm_info *lsm, bool enabled) { /* * When an LSM hasn't configured an enable variable, we can use * a hard-coded location for storing the default enabled state. */ if (!lsm->enabled) { if (enabled) lsm->enabled = &lsm_enabled_true; else lsm->enabled = &lsm_enabled_false; } else if (lsm->enabled == &lsm_enabled_true) { if (!enabled) lsm->enabled = &lsm_enabled_false; } else if (lsm->enabled == &lsm_enabled_false) { if (enabled) lsm->enabled = &lsm_enabled_true; } else { *lsm->enabled = enabled; } } /* Is an LSM already listed in the ordered LSMs list? */ static bool __init exists_ordered_lsm(struct lsm_info *lsm) { struct lsm_info **check; for (check = ordered_lsms; *check; check++) if (*check == lsm) return true; return false; } /* Append an LSM to the list of ordered LSMs to initialize. */ static int last_lsm __initdata; static void __init append_ordered_lsm(struct lsm_info *lsm, const char *from) { /* Ignore duplicate selections. */ if (exists_ordered_lsm(lsm)) return; if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from)) return; /* Enable this LSM, if it is not already set. */ if (!lsm->enabled) lsm->enabled = &lsm_enabled_true; ordered_lsms[last_lsm++] = lsm; init_debug("%s ordering: %s (%sabled)\n", from, lsm->name, is_enabled(lsm) ? "en" : "dis"); } /* Is an LSM allowed to be initialized? */ static bool __init lsm_allowed(struct lsm_info *lsm) { /* Skip if the LSM is disabled. */ if (!is_enabled(lsm)) return false; /* Not allowed if another exclusive LSM already initialized. */ if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && exclusive) { init_debug("exclusive disabled: %s\n", lsm->name); return false; } return true; } static void __init lsm_set_blob_size(int *need, int *lbs) { int offset; if (*need > 0) { offset = *lbs; *lbs += *need; *need = offset; } } static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) { if (!needed) return; lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); /* * The inode blob gets an rcu_head in addition to * what the modules might need. */ if (needed->lbs_inode && blob_sizes.lbs_inode == 0) blob_sizes.lbs_inode = sizeof(struct rcu_head); lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode); lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc); lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg); lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task); } /* Prepare LSM for initialization. */ static void __init prepare_lsm(struct lsm_info *lsm) { int enabled = lsm_allowed(lsm); /* Record enablement (to handle any following exclusive LSMs). */ set_enabled(lsm, enabled); /* If enabled, do pre-initialization work. */ if (enabled) { if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && !exclusive) { exclusive = lsm; init_debug("exclusive chosen: %s\n", lsm->name); } lsm_set_blob_sizes(lsm->blobs); } } /* Initialize a given LSM, if it is enabled. */ static void __init initialize_lsm(struct lsm_info *lsm) { if (is_enabled(lsm)) { int ret; init_debug("initializing %s\n", lsm->name); ret = lsm->init(); WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); } } /* Populate ordered LSMs list from comma-separated LSM name list. */ static void __init ordered_lsm_parse(const char *order, const char *origin) { struct lsm_info *lsm; char *sep, *name, *next; /* LSM_ORDER_FIRST is always first. */ for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if (lsm->order == LSM_ORDER_FIRST) append_ordered_lsm(lsm, "first"); } /* Process "security=", if given. */ if (chosen_major_lsm) { struct lsm_info *major; /* * To match the original "security=" behavior, this * explicitly does NOT fallback to another Legacy Major * if the selected one was separately disabled: disable * all non-matching Legacy Major LSMs. */ for (major = __start_lsm_info; major < __end_lsm_info; major++) { if ((major->flags & LSM_FLAG_LEGACY_MAJOR) && strcmp(major->name, chosen_major_lsm) != 0) { set_enabled(major, false); init_debug("security=%s disabled: %s\n", chosen_major_lsm, major->name); } } } sep = kstrdup(order, GFP_KERNEL); next = sep; /* Walk the list, looking for matching LSMs. */ while ((name = strsep(&next, ",")) != NULL) { bool found = false; for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if (lsm->order == LSM_ORDER_MUTABLE && strcmp(lsm->name, name) == 0) { append_ordered_lsm(lsm, origin); found = true; } } if (!found) init_debug("%s ignored: %s\n", origin, name); } /* Process "security=", if given. */ if (chosen_major_lsm) { for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if (exists_ordered_lsm(lsm)) continue; if (strcmp(lsm->name, chosen_major_lsm) == 0) append_ordered_lsm(lsm, "security="); } } /* Disable all LSMs not in the ordered list. */ for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) { if (exists_ordered_lsm(lsm)) continue; set_enabled(lsm, false); init_debug("%s disabled: %s\n", origin, lsm->name); } kfree(sep); } static void __init lsm_early_cred(struct cred *cred); static void __init lsm_early_task(struct task_struct *task); static int lsm_append(const char *new, char **result); static void __init ordered_lsm_init(void) { struct lsm_info **lsm; ordered_lsms = kcalloc(LSM_COUNT + 1, sizeof(*ordered_lsms), GFP_KERNEL); if (chosen_lsm_order) { if (chosen_major_lsm) { pr_info("security= is ignored because it is superseded by lsm=\n"); chosen_major_lsm = NULL; } ordered_lsm_parse(chosen_lsm_order, "cmdline"); } else ordered_lsm_parse(builtin_lsm_order, "builtin"); for (lsm = ordered_lsms; *lsm; lsm++) prepare_lsm(*lsm); init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc); init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg); init_debug("task blob size = %d\n", blob_sizes.lbs_task); /* * Create any kmem_caches needed for blobs */ if (blob_sizes.lbs_file) lsm_file_cache = kmem_cache_create("lsm_file_cache", blob_sizes.lbs_file, 0, SLAB_PANIC, NULL); if (blob_sizes.lbs_inode) lsm_inode_cache = kmem_cache_create("lsm_inode_cache", blob_sizes.lbs_inode, 0, SLAB_PANIC, NULL); lsm_early_cred((struct cred *) current->cred); lsm_early_task(current); for (lsm = ordered_lsms; *lsm; lsm++) initialize_lsm(*lsm); kfree(ordered_lsms); } int __init early_security_init(void) { int i; struct hlist_head *list = (struct hlist_head *) &security_hook_heads; struct lsm_info *lsm; for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head); i++) INIT_HLIST_HEAD(&list[i]); for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) { if (!lsm->enabled) lsm->enabled = &lsm_enabled_true; prepare_lsm(lsm); initialize_lsm(lsm); } return 0; } /** * security_init - initializes the security framework * * This should be called early in the kernel initialization sequence. */ int __init security_init(void) { struct lsm_info *lsm; pr_info("Security Framework initializing\n"); /* * Append the names of the early LSM modules now that kmalloc() is * available */ for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) { if (lsm->enabled) lsm_append(lsm->name, &lsm_names); } /* Load LSMs in specified order. */ ordered_lsm_init(); return 0; } /* Save user chosen LSM */ static int __init choose_major_lsm(char *str) { chosen_major_lsm = str; return 1; } __setup("security=", choose_major_lsm); /* Explicitly choose LSM initialization order. */ static int __init choose_lsm_order(char *str) { chosen_lsm_order = str; return 1; } __setup("lsm=", choose_lsm_order); /* Enable LSM order debugging. */ static int __init enable_debug(char *str) { debug = true; return 1; } __setup("lsm.debug", enable_debug); static bool match_last_lsm(const char *list, const char *lsm) { const char *last; if (WARN_ON(!list || !lsm)) return false; last = strrchr(list, ','); if (last) /* Pass the comma, strcmp() will check for '\0' */ last++; else last = list; return !strcmp(last, lsm); } static int lsm_append(const char *new, char **result) { char *cp; if (*result == NULL) { *result = kstrdup(new, GFP_KERNEL); if (*result == NULL) return -ENOMEM; } else { /* Check if it is the last registered name */ if (match_last_lsm(*result, new)) return 0; cp = kasprintf(GFP_KERNEL, "%s,%s", *result, new); if (cp == NULL) return -ENOMEM; kfree(*result); *result = cp; } return 0; } /** * security_add_hooks - Add a modules hooks to the hook lists. * @hooks: the hooks to add * @count: the number of hooks to add * @lsm: the name of the security module * * Each LSM has to register its hooks with the infrastructure. */ void __init security_add_hooks(struct security_hook_list *hooks, int count, char *lsm) { int i; for (i = 0; i < count; i++) { hooks[i].lsm = lsm; hlist_add_tail_rcu(&hooks[i].list, hooks[i].head); } /* * Don't try to append during early_security_init(), we'll come back * and fix this up afterwards. */ if (slab_is_available()) { if (lsm_append(lsm, &lsm_names) < 0) panic("%s - Cannot get early memory.\n", __func__); } } int call_blocking_lsm_notifier(enum lsm_event event, void *data) { return blocking_notifier_call_chain(&blocking_lsm_notifier_chain, event, data); } EXPORT_SYMBOL(call_blocking_lsm_notifier); int register_blocking_lsm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&blocking_lsm_notifier_chain, nb); } EXPORT_SYMBOL(register_blocking_lsm_notifier); int unregister_blocking_lsm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&blocking_lsm_notifier_chain, nb); } EXPORT_SYMBOL(unregister_blocking_lsm_notifier); /** * lsm_cred_alloc - allocate a composite cred blob * @cred: the cred that needs a blob * @gfp: allocation type * * Allocate the cred blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) { if (blob_sizes.lbs_cred == 0) { cred->security = NULL; return 0; } cred->security = kzalloc(blob_sizes.lbs_cred, gfp); if (cred->security == NULL) return -ENOMEM; return 0; } /** * lsm_early_cred - during initialization allocate a composite cred blob * @cred: the cred that needs a blob * * Allocate the cred blob for all the modules */ static void __init lsm_early_cred(struct cred *cred) { int rc = lsm_cred_alloc(cred, GFP_KERNEL); if (rc) panic("%s: Early cred alloc failed.\n", __func__); } /** * lsm_file_alloc - allocate a composite file blob * @file: the file that needs a blob * * Allocate the file blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ static int lsm_file_alloc(struct file *file) { if (!lsm_file_cache) { file->f_security = NULL; return 0; } file->f_security = kmem_cache_zalloc(lsm_file_cache, GFP_KERNEL); if (file->f_security == NULL) return -ENOMEM; return 0; } /** * lsm_inode_alloc - allocate a composite inode blob * @inode: the inode that needs a blob * * Allocate the inode blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ int lsm_inode_alloc(struct inode *inode) { if (!lsm_inode_cache) { inode->i_security = NULL; return 0; } inode->i_security = kmem_cache_zalloc(lsm_inode_cache, GFP_NOFS); if (inode->i_security == NULL) return -ENOMEM; return 0; } /** * lsm_task_alloc - allocate a composite task blob * @task: the task that needs a blob * * Allocate the task blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ static int lsm_task_alloc(struct task_struct *task) { if (blob_sizes.lbs_task == 0) { task->security = NULL; return 0; } task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL); if (task->security == NULL) return -ENOMEM; return 0; } /** * lsm_ipc_alloc - allocate a composite ipc blob * @kip: the ipc that needs a blob * * Allocate the ipc blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ static int lsm_ipc_alloc(struct kern_ipc_perm *kip) { if (blob_sizes.lbs_ipc == 0) { kip->security = NULL; return 0; } kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL); if (kip->security == NULL) return -ENOMEM; return 0; } /** * lsm_msg_msg_alloc - allocate a composite msg_msg blob * @mp: the msg_msg that needs a blob * * Allocate the ipc blob for all the modules * * Returns 0, or -ENOMEM if memory can't be allocated. */ static int lsm_msg_msg_alloc(struct msg_msg *mp) { if (blob_sizes.lbs_msg_msg == 0) { mp->security = NULL; return 0; } mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL); if (mp->security == NULL) return -ENOMEM; return 0; } /** * lsm_early_task - during initialization allocate a composite task blob * @task: the task that needs a blob * * Allocate the task blob for all the modules */ static void __init lsm_early_task(struct task_struct *task) { int rc = lsm_task_alloc(task); if (rc) panic("%s: Early task alloc failed.\n", __func__); } /* * The default value of the LSM hook is defined in linux/lsm_hook_defs.h and * can be accessed with: * * LSM_RET_DEFAULT(<hook_name>) * * The macros below define static constants for the default value of each * LSM hook. */ #define LSM_RET_DEFAULT(NAME) (NAME##_default) #define DECLARE_LSM_RET_DEFAULT_void(DEFAULT, NAME) #define DECLARE_LSM_RET_DEFAULT_int(DEFAULT, NAME) \ static const int LSM_RET_DEFAULT(NAME) = (DEFAULT); #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ DECLARE_LSM_RET_DEFAULT_##RET(DEFAULT, NAME) #include <linux/lsm_hook_defs.h> #undef LSM_HOOK /* * Hook list operation macros. * * call_void_hook: * This is a hook that does not return a value. * * call_int_hook: * This is a hook that returns a value. */ #define call_void_hook(FUNC, ...) \ do { \ struct security_hook_list *P; \ \ hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \ P->hook.FUNC(__VA_ARGS__); \ } while (0) #define call_int_hook(FUNC, IRC, ...) ({ \ int RC = IRC; \ do { \ struct security_hook_list *P; \ \ hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \ RC = P->hook.FUNC(__VA_ARGS__); \ if (RC != 0) \ break; \ } \ } while (0); \ RC; \ }) /* Security operations */ int security_binder_set_context_mgr(struct task_struct *mgr) { return call_int_hook(binder_set_context_mgr, 0, mgr); } int security_binder_transaction(struct task_struct *from, struct task_struct *to) { return call_int_hook(binder_transaction, 0, from, to); } int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to) { return call_int_hook(binder_transfer_binder, 0, from, to); } int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) { return call_int_hook(binder_transfer_file, 0, from, to, file); } int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { return call_int_hook(ptrace_access_check, 0, child, mode); } int security_ptrace_traceme(struct task_struct *parent) { return call_int_hook(ptrace_traceme, 0, parent); } int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { return call_int_hook(capget, 0, target, effective, inheritable, permitted); } int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { return call_int_hook(capset, 0, new, old, effective, inheritable, permitted); } int security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) { return call_int_hook(capable, 0, cred, ns, cap, opts); } int security_quotactl(int cmds, int type, int id, struct super_block *sb) { return call_int_hook(quotactl, 0, cmds, type, id, sb); } int security_quota_on(struct dentry *dentry) { return call_int_hook(quota_on, 0, dentry); } int security_syslog(int type) { return call_int_hook(syslog, 0, type); } int security_settime64(const struct timespec64 *ts, const struct timezone *tz) { return call_int_hook(settime, 0, ts, tz); } int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { struct security_hook_list *hp; int cap_sys_admin = 1; int rc; /* * The module will respond with a positive value if * it thinks the __vm_enough_memory() call should be * made with the cap_sys_admin set. If all of the modules * agree that it should be set it will. If any module * thinks it should not be set it won't. */ hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) { rc = hp->hook.vm_enough_memory(mm, pages); if (rc <= 0) { cap_sys_admin = 0; break; } } return __vm_enough_memory(mm, pages, cap_sys_admin); } int security_bprm_creds_for_exec(struct linux_binprm *bprm) { return call_int_hook(bprm_creds_for_exec, 0, bprm); } int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) { return call_int_hook(bprm_creds_from_file, 0, bprm, file); } int security_bprm_check(struct linux_binprm *bprm) { int ret; ret = call_int_hook(bprm_check_security, 0, bprm); if (ret) return ret; return ima_bprm_check(bprm); } void security_bprm_committing_creds(struct linux_binprm *bprm) { call_void_hook(bprm_committing_creds, bprm); } void security_bprm_committed_creds(struct linux_binprm *bprm) { call_void_hook(bprm_committed_creds, bprm); } int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { return call_int_hook(fs_context_dup, 0, fc, src_fc); } int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); } int security_sb_alloc(struct super_block *sb) { return call_int_hook(sb_alloc_security, 0, sb); } void security_sb_free(struct super_block *sb) { call_void_hook(sb_free_security, sb); } void security_free_mnt_opts(void **mnt_opts) { if (!*mnt_opts) return; call_void_hook(sb_free_mnt_opts, *mnt_opts); *mnt_opts = NULL; } EXPORT_SYMBOL(security_free_mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts) { return call_int_hook(sb_eat_lsm_opts, 0, options, mnt_opts); } EXPORT_SYMBOL(security_sb_eat_lsm_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts) { return call_int_hook(sb_remount, 0, sb, mnt_opts); } EXPORT_SYMBOL(security_sb_remount); int security_sb_kern_mount(struct super_block *sb) { return call_int_hook(sb_kern_mount, 0, sb); } int security_sb_show_options(struct seq_file *m, struct super_block *sb) { return call_int_hook(sb_show_options, 0, m, sb); } int security_sb_statfs(struct dentry *dentry) { return call_int_hook(sb_statfs, 0, dentry); } int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) { return call_int_hook(sb_mount, 0, dev_name, path, type, flags, data); } int security_sb_umount(struct vfsmount *mnt, int flags) { return call_int_hook(sb_umount, 0, mnt, flags); } int security_sb_pivotroot(const struct path *old_path, const struct path *new_path) { return call_int_hook(sb_pivotroot, 0, old_path, new_path); } int security_sb_set_mnt_opts(struct super_block *sb, void *mnt_opts, unsigned long kern_flags, unsigned long *set_kern_flags) { return call_int_hook(sb_set_mnt_opts, mnt_opts ? -EOPNOTSUPP : 0, sb, mnt_opts, kern_flags, set_kern_flags); } EXPORT_SYMBOL(security_sb_set_mnt_opts); int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb, unsigned long kern_flags, unsigned long *set_kern_flags) { return call_int_hook(sb_clone_mnt_opts, 0, oldsb, newsb, kern_flags, set_kern_flags); } EXPORT_SYMBOL(security_sb_clone_mnt_opts); int security_add_mnt_opt(const char *option, const char *val, int len, void **mnt_opts) { return call_int_hook(sb_add_mnt_opt, -EINVAL, option, val, len, mnt_opts); } EXPORT_SYMBOL(security_add_mnt_opt); int security_move_mount(const struct path *from_path, const struct path *to_path) { return call_int_hook(move_mount, 0, from_path, to_path); } int security_path_notify(const struct path *path, u64 mask, unsigned int obj_type) { return call_int_hook(path_notify, 0, path, mask, obj_type); } int security_inode_alloc(struct inode *inode) { int rc = lsm_inode_alloc(inode); if (unlikely(rc)) return rc; rc = call_int_hook(inode_alloc_security, 0, inode); if (unlikely(rc)) security_inode_free(inode); return rc; } static void inode_free_by_rcu(struct rcu_head *head) { /* * The rcu head is at the start of the inode blob */ kmem_cache_free(lsm_inode_cache, head); } void security_inode_free(struct inode *inode) { integrity_inode_free(inode); call_void_hook(inode_free_security, inode); /* * The inode may still be referenced in a path walk and * a call to security_inode_permission() can be made * after inode_free_security() is called. Ideally, the VFS * wouldn't do this, but fixing that is a much harder * job. For now, simply free the i_security via RCU, and * leave the current inode->i_security pointer intact. * The inode will be freed after the RCU grace period too. */ if (inode->i_security) call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu); } int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, u32 *ctxlen) { return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, name, ctx, ctxlen); } EXPORT_SYMBOL(security_dentry_init_security); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) { return call_int_hook(dentry_create_files_as, 0, dentry, mode, name, old, new); } EXPORT_SYMBOL(security_dentry_create_files_as); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, const initxattrs initxattrs, void *fs_data) { struct xattr new_xattrs[MAX_LSM_EVM_XATTR + 1]; struct xattr *lsm_xattr, *evm_xattr, *xattr; int ret; if (unlikely(IS_PRIVATE(inode))) return 0; if (!initxattrs) return call_int_hook(inode_init_security, -EOPNOTSUPP, inode, dir, qstr, NULL, NULL, NULL); memset(new_xattrs, 0, sizeof(new_xattrs)); lsm_xattr = new_xattrs; ret = call_int_hook(inode_init_security, -EOPNOTSUPP, inode, dir, qstr, &lsm_xattr->name, &lsm_xattr->value, &lsm_xattr->value_len); if (ret) goto out; evm_xattr = lsm_xattr + 1; ret = evm_inode_init_security(inode, lsm_xattr, evm_xattr); if (ret) goto out; ret = initxattrs(inode, new_xattrs, fs_data); out: for (xattr = new_xattrs; xattr->value != NULL; xattr++) kfree(xattr->value); return (ret == -EOPNOTSUPP) ? 0 : ret; } EXPORT_SYMBOL(security_inode_init_security); int security_old_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, const char **name, void **value, size_t *len) { if (unlikely(IS_PRIVATE(inode))) return -EOPNOTSUPP; return call_int_hook(inode_init_security, -EOPNOTSUPP, inode, dir, qstr, name, value, len); } EXPORT_SYMBOL(security_old_inode_init_security); #ifdef CONFIG_SECURITY_PATH int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; return call_int_hook(path_mknod, 0, dir, dentry, mode, dev); } EXPORT_SYMBOL(security_path_mknod); int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; return call_int_hook(path_mkdir, 0, dir, dentry, mode); } EXPORT_SYMBOL(security_path_mkdir); int security_path_rmdir(const struct path *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; return call_int_hook(path_rmdir, 0, dir, dentry); } int security_path_unlink(const struct path *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; return call_int_hook(path_unlink, 0, dir, dentry); } EXPORT_SYMBOL(security_path_unlink); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; return call_int_hook(path_symlink, 0, dir, dentry, old_name); } int security_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)))) return 0; return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry); } int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry, unsigned int flags) { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) || (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry))))) return 0; if (flags & RENAME_EXCHANGE) { int err = call_int_hook(path_rename, 0, new_dir, new_dentry, old_dir, old_dentry); if (err) return err; } return call_int_hook(path_rename, 0, old_dir, old_dentry, new_dir, new_dentry); } EXPORT_SYMBOL(security_path_rename); int security_path_truncate(const struct path *path) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; return call_int_hook(path_truncate, 0, path); } int security_path_chmod(const struct path *path, umode_t mode) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; return call_int_hook(path_chmod, 0, path, mode); } int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; return call_int_hook(path_chown, 0, path, uid, gid); } int security_path_chroot(const struct path *path) { return call_int_hook(path_chroot, 0, path); } #endif int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode) { if (unlikely(IS_PRIVATE(dir))) return 0; return call_int_hook(inode_create, 0, dir, dentry, mode); } EXPORT_SYMBOL_GPL(security_inode_create); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)))) return 0; return call_int_hook(inode_link, 0, old_dentry, dir, new_dentry); } int security_inode_unlink(struct inode *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; return call_int_hook(inode_unlink, 0, dir, dentry); } int security_inode_symlink(struct inode *dir, struct dentry *dentry, const char *old_name) { if (unlikely(IS_PRIVATE(dir))) return 0; return call_int_hook(inode_symlink, 0, dir, dentry, old_name); } int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { if (unlikely(IS_PRIVATE(dir))) return 0; return call_int_hook(inode_mkdir, 0, dir, dentry, mode); } EXPORT_SYMBOL_GPL(security_inode_mkdir); int security_inode_rmdir(struct inode *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; return call_int_hook(inode_rmdir, 0, dir, dentry); } int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { if (unlikely(IS_PRIVATE(dir))) return 0; return call_int_hook(inode_mknod, 0, dir, dentry, mode, dev); } int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) || (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry))))) return 0; if (flags & RENAME_EXCHANGE) { int err = call_int_hook(inode_rename, 0, new_dir, new_dentry, old_dir, old_dentry); if (err) return err; } return call_int_hook(inode_rename, 0, old_dir, old_dentry, new_dir, new_dentry); } int security_inode_readlink(struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; return call_int_hook(inode_readlink, 0, dentry); } int security_inode_follow_link(struct dentry *dentry, struct inode *inode, bool rcu) { if (unlikely(IS_PRIVATE(inode))) return 0; return call_int_hook(inode_follow_link, 0, dentry, inode, rcu); } int security_inode_permission(struct inode *inode, int mask) { if (unlikely(IS_PRIVATE(inode))) return 0; return call_int_hook(inode_permission, 0, inode, mask); } int security_inode_setattr(struct dentry *dentry, struct iattr *attr) { int ret; if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; ret = call_int_hook(inode_setattr, 0, dentry, attr); if (ret) return ret; return evm_inode_setattr(dentry, attr); } EXPORT_SYMBOL_GPL(security_inode_setattr); int security_inode_getattr(const struct path *path) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; return call_int_hook(inode_getattr, 0, path); } int security_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { int ret; if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; /* * SELinux and Smack integrate the cap call, * so assume that all LSMs supplying this call do so. */ ret = call_int_hook(inode_setxattr, 1, dentry, name, value, size, flags); if (ret == 1) ret = cap_inode_setxattr(dentry, name, value, size, flags); if (ret) return ret; ret = ima_inode_setxattr(dentry, name, value, size); if (ret) return ret; return evm_inode_setxattr(dentry, name, value, size); } void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return; call_void_hook(inode_post_setxattr, dentry, name, value, size, flags); evm_inode_post_setxattr(dentry, name, value, size); } int security_inode_getxattr(struct dentry *dentry, const char *name) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; return call_int_hook(inode_getxattr, 0, dentry, name); } int security_inode_listxattr(struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; return call_int_hook(inode_listxattr, 0, dentry); } int security_inode_removexattr(struct dentry *dentry, const char *name) { int ret; if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; /* * SELinux and Smack integrate the cap call, * so assume that all LSMs supplying this call do so. */ ret = call_int_hook(inode_removexattr, 1, dentry, name); if (ret == 1) ret = cap_inode_removexattr(dentry, name); if (ret) return ret; ret = ima_inode_removexattr(dentry, name); if (ret) return ret; return evm_inode_removexattr(dentry, name); } int security_inode_need_killpriv(struct dentry *dentry) { return call_int_hook(inode_need_killpriv, 0, dentry); } int security_inode_killpriv(struct dentry *dentry) { return call_int_hook(inode_killpriv, 0, dentry); } int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc) { struct security_hook_list *hp; int rc; if (unlikely(IS_PRIVATE(inode))) return LSM_RET_DEFAULT(inode_getsecurity); /* * Only one module will provide an attribute with a given name. */ hlist_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) { rc = hp->hook.inode_getsecurity(inode, name, buffer, alloc); if (rc != LSM_RET_DEFAULT(inode_getsecurity)) return rc; } return LSM_RET_DEFAULT(inode_getsecurity); } int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct security_hook_list *hp; int rc; if (unlikely(IS_PRIVATE(inode))) return LSM_RET_DEFAULT(inode_setsecurity); /* * Only one module will provide an attribute with a given name. */ hlist_for_each_entry(hp, &security_hook_heads.inode_setsecurity, list) { rc = hp->hook.inode_setsecurity(inode, name, value, size, flags); if (rc != LSM_RET_DEFAULT(inode_setsecurity)) return rc; } return LSM_RET_DEFAULT(inode_setsecurity); } int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size) { if (unlikely(IS_PRIVATE(inode))) return 0; return call_int_hook(inode_listsecurity, 0, inode, buffer, buffer_size); } EXPORT_SYMBOL(security_inode_listsecurity); void security_inode_getsecid(struct inode *inode, u32 *secid) { call_void_hook(inode_getsecid, inode, secid); } int security_inode_copy_up(struct dentry *src, struct cred **new) { return call_int_hook(inode_copy_up, 0, src, new); } EXPORT_SYMBOL(security_inode_copy_up); int security_inode_copy_up_xattr(const char *name) { struct security_hook_list *hp; int rc; /* * The implementation can return 0 (accept the xattr), 1 (discard the * xattr), -EOPNOTSUPP if it does not know anything about the xattr or * any other error code incase of an error. */ hlist_for_each_entry(hp, &security_hook_heads.inode_copy_up_xattr, list) { rc = hp->hook.inode_copy_up_xattr(name); if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr)) return rc; } return LSM_RET_DEFAULT(inode_copy_up_xattr); } EXPORT_SYMBOL(security_inode_copy_up_xattr); int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn) { return call_int_hook(kernfs_init_security, 0, kn_dir, kn); } int security_file_permission(struct file *file, int mask) { int ret; ret = call_int_hook(file_permission, 0, file, mask); if (ret) return ret; return fsnotify_perm(file, mask); } int security_file_alloc(struct file *file) { int rc = lsm_file_alloc(file); if (rc) return rc; rc = call_int_hook(file_alloc_security, 0, file); if (unlikely(rc)) security_file_free(file); return rc; } void security_file_free(struct file *file) { void *blob; call_void_hook(file_free_security, file); blob = file->f_security; if (blob) { file->f_security = NULL; kmem_cache_free(lsm_file_cache, blob); } } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return call_int_hook(file_ioctl, 0, file, cmd, arg); } EXPORT_SYMBOL_GPL(security_file_ioctl); static inline unsigned long mmap_prot(struct file *file, unsigned long prot) { /* * Does we have PROT_READ and does the application expect * it to imply PROT_EXEC? If not, nothing to talk about... */ if ((prot & (PROT_READ | PROT_EXEC)) != PROT_READ) return prot; if (!(current->personality & READ_IMPLIES_EXEC)) return prot; /* * if that's an anonymous mapping, let it. */ if (!file) return prot | PROT_EXEC; /* * ditto if it's not on noexec mount, except that on !MMU we need * NOMMU_MAP_EXEC (== VM_MAYEXEC) in this case */ if (!path_noexec(&file->f_path)) { #ifndef CONFIG_MMU if (file->f_op->mmap_capabilities) { unsigned caps = file->f_op->mmap_capabilities(file); if (!(caps & NOMMU_MAP_EXEC)) return prot; } #endif return prot | PROT_EXEC; } /* anything on noexec mount won't get PROT_EXEC */ return prot; } int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { int ret; ret = call_int_hook(mmap_file, 0, file, prot, mmap_prot(file, prot), flags); if (ret) return ret; return ima_file_mmap(file, prot); } int security_mmap_addr(unsigned long addr) { return call_int_hook(mmap_addr, 0, addr); } int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { int ret; ret = call_int_hook(file_mprotect, 0, vma, reqprot, prot); if (ret) return ret; return ima_file_mprotect(vma, prot); } int security_file_lock(struct file *file, unsigned int cmd) { return call_int_hook(file_lock, 0, file, cmd); } int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { return call_int_hook(file_fcntl, 0, file, cmd, arg); } void security_file_set_fowner(struct file *file) { call_void_hook(file_set_fowner, file); } int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig) { return call_int_hook(file_send_sigiotask, 0, tsk, fown, sig); } int security_file_receive(struct file *file) { return call_int_hook(file_receive, 0, file); } int security_file_open(struct file *file) { int ret; ret = call_int_hook(file_open, 0, file); if (ret) return ret; return fsnotify_perm(file, MAY_OPEN); } int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { int rc = lsm_task_alloc(task); if (rc) return rc; rc = call_int_hook(task_alloc, 0, task, clone_flags); if (unlikely(rc)) security_task_free(task); return rc; } void security_task_free(struct task_struct *task) { call_void_hook(task_free, task); kfree(task->security); task->security = NULL; } int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) { int rc = lsm_cred_alloc(cred, gfp); if (rc) return rc; rc = call_int_hook(cred_alloc_blank, 0, cred, gfp); if (unlikely(rc)) security_cred_free(cred); return rc; } void security_cred_free(struct cred *cred) { /* * There is a failure case in prepare_creds() that * may result in a call here with ->security being NULL. */ if (unlikely(cred->security == NULL)) return; call_void_hook(cred_free, cred); kfree(cred->security); cred->security = NULL; } int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { int rc = lsm_cred_alloc(new, gfp); if (rc) return rc; rc = call_int_hook(cred_prepare, 0, new, old, gfp); if (unlikely(rc)) security_cred_free(new); return rc; } void security_transfer_creds(struct cred *new, const struct cred *old) { call_void_hook(cred_transfer, new, old); } void security_cred_getsecid(const struct cred *c, u32 *secid) { *secid = 0; call_void_hook(cred_getsecid, c, secid); } EXPORT_SYMBOL(security_cred_getsecid); int security_kernel_act_as(struct cred *new, u32 secid) { return call_int_hook(kernel_act_as, 0, new, secid); } int security_kernel_create_files_as(struct cred *new, struct inode *inode) { return call_int_hook(kernel_create_files_as, 0, new, inode); } int security_kernel_module_request(char *kmod_name) { int ret; ret = call_int_hook(kernel_module_request, 0, kmod_name); if (ret) return ret; return integrity_kernel_module_request(kmod_name); } int security_kernel_read_file(struct file *file, enum kernel_read_file_id id, bool contents) { int ret; ret = call_int_hook(kernel_read_file, 0, file, id, contents); if (ret) return ret; return ima_read_file(file, id, contents); } EXPORT_SYMBOL_GPL(security_kernel_read_file); int security_kernel_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id) { int ret; ret = call_int_hook(kernel_post_read_file, 0, file, buf, size, id); if (ret) return ret; return ima_post_read_file(file, buf, size, id); } EXPORT_SYMBOL_GPL(security_kernel_post_read_file); int security_kernel_load_data(enum kernel_load_data_id id, bool contents) { int ret; ret = call_int_hook(kernel_load_data, 0, id, contents); if (ret) return ret; return ima_load_data(id, contents); } EXPORT_SYMBOL_GPL(security_kernel_load_data); int security_kernel_post_load_data(char *buf, loff_t size, enum kernel_load_data_id id, char *description) { int ret; ret = call_int_hook(kernel_post_load_data, 0, buf, size, id, description); if (ret) return ret; return ima_post_load_data(buf, size, id, description); } EXPORT_SYMBOL_GPL(security_kernel_post_load_data); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { return call_int_hook(task_fix_setuid, 0, new, old, flags); } int security_task_fix_setgid(struct cred *new, const struct cred *old, int flags) { return call_int_hook(task_fix_setgid, 0, new, old, flags); } int security_task_setpgid(struct task_struct *p, pid_t pgid) { return call_int_hook(task_setpgid, 0, p, pgid); } int security_task_getpgid(struct task_struct *p) { return call_int_hook(task_getpgid, 0, p); } int security_task_getsid(struct task_struct *p) { return call_int_hook(task_getsid, 0, p); } void security_task_getsecid(struct task_struct *p, u32 *secid) { *secid = 0; call_void_hook(task_getsecid, p, secid); } EXPORT_SYMBOL(security_task_getsecid); int security_task_setnice(struct task_struct *p, int nice) { return call_int_hook(task_setnice, 0, p, nice); } int security_task_setioprio(struct task_struct *p, int ioprio) { return call_int_hook(task_setioprio, 0, p, ioprio); } int security_task_getioprio(struct task_struct *p) { return call_int_hook(task_getioprio, 0, p); } int security_task_prlimit(const struct cred *cred, const struct cred *tcred, unsigned int flags) { return call_int_hook(task_prlimit, 0, cred, tcred, flags); } int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) { return call_int_hook(task_setrlimit, 0, p, resource, new_rlim); } int security_task_setscheduler(struct task_struct *p) { return call_int_hook(task_setscheduler, 0, p); } int security_task_getscheduler(struct task_struct *p) { return call_int_hook(task_getscheduler, 0, p); } int security_task_movememory(struct task_struct *p) { return call_int_hook(task_movememory, 0, p); } int security_task_kill(struct task_struct *p, struct kernel_siginfo *info, int sig, const struct cred *cred) { return call_int_hook(task_kill, 0, p, info, sig, cred); } int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int thisrc; int rc = LSM_RET_DEFAULT(task_prctl); struct security_hook_list *hp; hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) { thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5); if (thisrc != LSM_RET_DEFAULT(task_prctl)) { rc = thisrc; if (thisrc != 0) break; } } return rc; } void security_task_to_inode(struct task_struct *p, struct inode *inode) { call_void_hook(task_to_inode, p, inode); } int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { return call_int_hook(ipc_permission, 0, ipcp, flag); } void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) { *secid = 0; call_void_hook(ipc_getsecid, ipcp, secid); } int security_msg_msg_alloc(struct msg_msg *msg) { int rc = lsm_msg_msg_alloc(msg); if (unlikely(rc)) return rc; rc = call_int_hook(msg_msg_alloc_security, 0, msg); if (unlikely(rc)) security_msg_msg_free(msg); return rc; } void security_msg_msg_free(struct msg_msg *msg) { call_void_hook(msg_msg_free_security, msg); kfree(msg->security); msg->security = NULL; } int security_msg_queue_alloc(struct kern_ipc_perm *msq) { int rc = lsm_ipc_alloc(msq); if (unlikely(rc)) return rc; rc = call_int_hook(msg_queue_alloc_security, 0, msq); if (unlikely(rc)) security_msg_queue_free(msq); return rc; } void security_msg_queue_free(struct kern_ipc_perm *msq) { call_void_hook(msg_queue_free_security, msq); kfree(msq->security); msq->security = NULL; } int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) { return call_int_hook(msg_queue_associate, 0, msq, msqflg); } int security_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd) { return call_int_hook(msg_queue_msgctl, 0, msq, cmd); } int security_msg_queue_msgsnd(struct kern_ipc_perm *msq, struct msg_msg *msg, int msqflg) { return call_int_hook(msg_queue_msgsnd, 0, msq, msg, msqflg); } int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode) { return call_int_hook(msg_queue_msgrcv, 0, msq, msg, target, type, mode); } int security_shm_alloc(struct kern_ipc_perm *shp) { int rc = lsm_ipc_alloc(shp); if (unlikely(rc)) return rc; rc = call_int_hook(shm_alloc_security, 0, shp); if (unlikely(rc)) security_shm_free(shp); return rc; } void security_shm_free(struct kern_ipc_perm *shp) { call_void_hook(shm_free_security, shp); kfree(shp->security); shp->security = NULL; } int security_shm_associate(struct kern_ipc_perm *shp, int shmflg) { return call_int_hook(shm_associate, 0, shp, shmflg); } int security_shm_shmctl(struct kern_ipc_perm *shp, int cmd) { return call_int_hook(shm_shmctl, 0, shp, cmd); } int security_shm_shmat(struct kern_ipc_perm *shp, char __user *shmaddr, int shmflg) { return call_int_hook(shm_shmat, 0, shp, shmaddr, shmflg); } int security_sem_alloc(struct kern_ipc_perm *sma) { int rc = lsm_ipc_alloc(sma); if (unlikely(rc)) return rc; rc = call_int_hook(sem_alloc_security, 0, sma); if (unlikely(rc)) security_sem_free(sma); return rc; } void security_sem_free(struct kern_ipc_perm *sma) { call_void_hook(sem_free_security, sma); kfree(sma->security); sma->security = NULL; } int security_sem_associate(struct kern_ipc_perm *sma, int semflg) { return call_int_hook(sem_associate, 0, sma, semflg); } int security_sem_semctl(struct kern_ipc_perm *sma, int cmd) { return call_int_hook(sem_semctl, 0, sma, cmd); } int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter) { return call_int_hook(sem_semop, 0, sma, sops, nsops, alter); } void security_d_instantiate(struct dentry *dentry, struct inode *inode) { if (unlikely(inode && IS_PRIVATE(inode))) return; call_void_hook(d_instantiate, dentry, inode); } EXPORT_SYMBOL(security_d_instantiate); int security_getprocattr(struct task_struct *p, const char *lsm, char *name, char **value) { struct security_hook_list *hp; hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) { if (lsm != NULL && strcmp(lsm, hp->lsm)) continue; return hp->hook.getprocattr(p, name, value); } return LSM_RET_DEFAULT(getprocattr); } int security_setprocattr(const char *lsm, const char *name, void *value, size_t size) { struct security_hook_list *hp; hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) { if (lsm != NULL && strcmp(lsm, hp->lsm)) continue; return hp->hook.setprocattr(name, value, size); } return LSM_RET_DEFAULT(setprocattr); } int security_netlink_send(struct sock *sk, struct sk_buff *skb) { return call_int_hook(netlink_send, 0, sk, skb); } int security_ismaclabel(const char *name) { return call_int_hook(ismaclabel, 0, name); } EXPORT_SYMBOL(security_ismaclabel); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { struct security_hook_list *hp; int rc; /* * Currently, only one LSM can implement secid_to_secctx (i.e this * LSM hook is not "stackable"). */ hlist_for_each_entry(hp, &security_hook_heads.secid_to_secctx, list) { rc = hp->hook.secid_to_secctx(secid, secdata, seclen); if (rc != LSM_RET_DEFAULT(secid_to_secctx)) return rc; } return LSM_RET_DEFAULT(secid_to_secctx); } EXPORT_SYMBOL(security_secid_to_secctx); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { *secid = 0; return call_int_hook(secctx_to_secid, 0, secdata, seclen, secid); } EXPORT_SYMBOL(security_secctx_to_secid); void security_release_secctx(char *secdata, u32 seclen) { call_void_hook(release_secctx, secdata, seclen); } EXPORT_SYMBOL(security_release_secctx); void security_inode_invalidate_secctx(struct inode *inode) { call_void_hook(inode_invalidate_secctx, inode); } EXPORT_SYMBOL(security_inode_invalidate_secctx); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) { return call_int_hook(inode_notifysecctx, 0, inode, ctx, ctxlen); } EXPORT_SYMBOL(security_inode_notifysecctx); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) { return call_int_hook(inode_setsecctx, 0, dentry, ctx, ctxlen); } EXPORT_SYMBOL(security_inode_setsecctx); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); } EXPORT_SYMBOL(security_inode_getsecctx); #ifdef CONFIG_WATCH_QUEUE int security_post_notification(const struct cred *w_cred, const struct cred *cred, struct watch_notification *n) { return call_int_hook(post_notification, 0, w_cred, cred, n); } #endif /* CONFIG_WATCH_QUEUE */ #ifdef CONFIG_KEY_NOTIFICATIONS int security_watch_key(struct key *key) { return call_int_hook(watch_key, 0, key); } #endif #ifdef CONFIG_SECURITY_NETWORK int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) { return call_int_hook(unix_stream_connect, 0, sock, other, newsk); } EXPORT_SYMBOL(security_unix_stream_connect); int security_unix_may_send(struct socket *sock, struct socket *other) { return call_int_hook(unix_may_send, 0, sock, other); } EXPORT_SYMBOL(security_unix_may_send); int security_socket_create(int family, int type, int protocol, int kern) { return call_int_hook(socket_create, 0, family, type, protocol, kern); } int security_socket_post_create(struct socket *sock, int family, int type, int protocol, int kern) { return call_int_hook(socket_post_create, 0, sock, family, type, protocol, kern); } int security_socket_socketpair(struct socket *socka, struct socket *sockb) { return call_int_hook(socket_socketpair, 0, socka, sockb); } EXPORT_SYMBOL(security_socket_socketpair); int security_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { return call_int_hook(socket_bind, 0, sock, address, addrlen); } int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) { return call_int_hook(socket_connect, 0, sock, address, addrlen); } int security_socket_listen(struct socket *sock, int backlog) { return call_int_hook(socket_listen, 0, sock, backlog); } int security_socket_accept(struct socket *sock, struct socket *newsock) { return call_int_hook(socket_accept, 0, sock, newsock); } int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { return call_int_hook(socket_sendmsg, 0, sock, msg, size); } int security_socket_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags) { return call_int_hook(socket_recvmsg, 0, sock, msg, size, flags); } int security_socket_getsockname(struct socket *sock) { return call_int_hook(socket_getsockname, 0, sock); } int security_socket_getpeername(struct socket *sock) { return call_int_hook(socket_getpeername, 0, sock); } int security_socket_getsockopt(struct socket *sock, int level, int optname) { return call_int_hook(socket_getsockopt, 0, sock, level, optname); } int security_socket_setsockopt(struct socket *sock, int level, int optname) { return call_int_hook(socket_setsockopt, 0, sock, level, optname); } int security_socket_shutdown(struct socket *sock, int how) { return call_int_hook(socket_shutdown, 0, sock, how); } int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { return call_int_hook(socket_sock_rcv_skb, 0, sk, skb); } EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, int __user *optlen, unsigned len) { return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, optval, optlen, len); } int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, skb, secid); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return call_int_hook(sk_alloc_security, 0, sk, family, priority); } void security_sk_free(struct sock *sk) { call_void_hook(sk_free_security, sk); } void security_sk_clone(const struct sock *sk, struct sock *newsk) { call_void_hook(sk_clone_security, sk, newsk); } EXPORT_SYMBOL(security_sk_clone); void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { call_void_hook(sk_getsecid, sk, &fl->flowi_secid); } EXPORT_SYMBOL(security_sk_classify_flow); void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) { call_void_hook(req_classify_flow, req, fl); } EXPORT_SYMBOL(security_req_classify_flow); void security_sock_graft(struct sock *sk, struct socket *parent) { call_void_hook(sock_graft, sk, parent); } EXPORT_SYMBOL(security_sock_graft); int security_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { return call_int_hook(inet_conn_request, 0, sk, skb, req); } EXPORT_SYMBOL(security_inet_conn_request); void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req) { call_void_hook(inet_csk_clone, newsk, req); } void security_inet_conn_established(struct sock *sk, struct sk_buff *skb) { call_void_hook(inet_conn_established, sk, skb); } EXPORT_SYMBOL(security_inet_conn_established); int security_secmark_relabel_packet(u32 secid) { return call_int_hook(secmark_relabel_packet, 0, secid); } EXPORT_SYMBOL(security_secmark_relabel_packet); void security_secmark_refcount_inc(void) { call_void_hook(secmark_refcount_inc); } EXPORT_SYMBOL(security_secmark_refcount_inc); void security_secmark_refcount_dec(void) { call_void_hook(secmark_refcount_dec); } EXPORT_SYMBOL(security_secmark_refcount_dec); int security_tun_dev_alloc_security(void **security) { return call_int_hook(tun_dev_alloc_security, 0, security); } EXPORT_SYMBOL(security_tun_dev_alloc_security); void security_tun_dev_free_security(void *security) { call_void_hook(tun_dev_free_security, security); } EXPORT_SYMBOL(security_tun_dev_free_security); int security_tun_dev_create(void) { return call_int_hook(tun_dev_create, 0); } EXPORT_SYMBOL(security_tun_dev_create); int security_tun_dev_attach_queue(void *security) { return call_int_hook(tun_dev_attach_queue, 0, security); } EXPORT_SYMBOL(security_tun_dev_attach_queue); int security_tun_dev_attach(struct sock *sk, void *security) { return call_int_hook(tun_dev_attach, 0, sk, security); } EXPORT_SYMBOL(security_tun_dev_attach); int security_tun_dev_open(void *security) { return call_int_hook(tun_dev_open, 0, security); } EXPORT_SYMBOL(security_tun_dev_open); int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb) { return call_int_hook(sctp_assoc_request, 0, ep, skb); } EXPORT_SYMBOL(security_sctp_assoc_request); int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen) { return call_int_hook(sctp_bind_connect, 0, sk, optname, address, addrlen); } EXPORT_SYMBOL(security_sctp_bind_connect); void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, struct sock *newsk) { call_void_hook(sctp_sk_clone, ep, sk, newsk); } EXPORT_SYMBOL(security_sctp_sk_clone); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey) { return call_int_hook(ib_pkey_access, 0, sec, subnet_prefix, pkey); } EXPORT_SYMBOL(security_ib_pkey_access); int security_ib_endport_manage_subnet(void *sec, const char *dev_name, u8 port_num) { return call_int_hook(ib_endport_manage_subnet, 0, sec, dev_name, port_num); } EXPORT_SYMBOL(security_ib_endport_manage_subnet); int security_ib_alloc_security(void **sec) { return call_int_hook(ib_alloc_security, 0, sec); } EXPORT_SYMBOL(security_ib_alloc_security); void security_ib_free_security(void *sec) { call_void_hook(ib_free_security, sec); } EXPORT_SYMBOL(security_ib_free_security); #endif /* CONFIG_SECURITY_INFINIBAND */ #ifdef CONFIG_SECURITY_NETWORK_XFRM int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp) { return call_int_hook(xfrm_policy_alloc_security, 0, ctxp, sec_ctx, gfp); } EXPORT_SYMBOL(security_xfrm_policy_alloc); int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp) { return call_int_hook(xfrm_policy_clone_security, 0, old_ctx, new_ctxp); } void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { call_void_hook(xfrm_policy_free_security, ctx); } EXPORT_SYMBOL(security_xfrm_policy_free); int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { return call_int_hook(xfrm_policy_delete_security, 0, ctx); } int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return call_int_hook(xfrm_state_alloc, 0, x, sec_ctx); } EXPORT_SYMBOL(security_xfrm_state_alloc); int security_xfrm_state_alloc_acquire(struct xfrm_state *x, struct xfrm_sec_ctx *polsec, u32 secid) { return call_int_hook(xfrm_state_alloc_acquire, 0, x, polsec, secid); } int security_xfrm_state_delete(struct xfrm_state *x) { return call_int_hook(xfrm_state_delete_security, 0, x); } EXPORT_SYMBOL(security_xfrm_state_delete); void security_xfrm_state_free(struct xfrm_state *x) { call_void_hook(xfrm_state_free_security, x); } int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) { return call_int_hook(xfrm_policy_lookup, 0, ctx, fl_secid, dir); } int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, const struct flowi *fl) { struct security_hook_list *hp; int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match); /* * Since this function is expected to return 0 or 1, the judgment * becomes difficult if multiple LSMs supply this call. Fortunately, * we can use the first LSM's judgment because currently only SELinux * supplies this call. * * For speed optimization, we explicitly break the loop rather than * using the macro */ hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match, list) { rc = hp->hook.xfrm_state_pol_flow_match(x, xp, fl); break; } return rc; } int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { return call_int_hook(xfrm_decode_session, 0, skb, secid, 1); } void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) { int rc = call_int_hook(xfrm_decode_session, 0, skb, &fl->flowi_secid, 0); BUG_ON(rc); } EXPORT_SYMBOL(security_skb_classify_flow); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { return call_int_hook(key_alloc, 0, key, cred, flags); } void security_key_free(struct key *key) { call_void_hook(key_free, key); } int security_key_permission(key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) { return call_int_hook(key_permission, 0, key_ref, cred, need_perm); } int security_key_getsecurity(struct key *key, char **_buffer) { *_buffer = NULL; return call_int_hook(key_getsecurity, 0, key, _buffer); } #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule) { return call_int_hook(audit_rule_init, 0, field, op, rulestr, lsmrule); } int security_audit_rule_known(struct audit_krule *krule) { return call_int_hook(audit_rule_known, 0, krule); } void security_audit_rule_free(void *lsmrule) { call_void_hook(audit_rule_free, lsmrule); } int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule) { return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule); } #endif /* CONFIG_AUDIT */ #ifdef CONFIG_BPF_SYSCALL int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) { return call_int_hook(bpf, 0, cmd, attr, size); } int security_bpf_map(struct bpf_map *map, fmode_t fmode) { return call_int_hook(bpf_map, 0, map, fmode); } int security_bpf_prog(struct bpf_prog *prog) { return call_int_hook(bpf_prog, 0, prog); } int security_bpf_map_alloc(struct bpf_map *map) { return call_int_hook(bpf_map_alloc_security, 0, map); } int security_bpf_prog_alloc(struct bpf_prog_aux *aux) { return call_int_hook(bpf_prog_alloc_security, 0, aux); } void security_bpf_map_free(struct bpf_map *map) { call_void_hook(bpf_map_free_security, map); } void security_bpf_prog_free(struct bpf_prog_aux *aux) { call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ int security_locked_down(enum lockdown_reason what) { return call_int_hook(locked_down, 0, what); } EXPORT_SYMBOL(security_locked_down); #ifdef CONFIG_PERF_EVENTS int security_perf_event_open(struct perf_event_attr *attr, int type) { return call_int_hook(perf_event_open, 0, attr, type); } int security_perf_event_alloc(struct perf_event *event) { return call_int_hook(perf_event_alloc, 0, event); } void security_perf_event_free(struct perf_event *event) { call_void_hook(perf_event_free, event); } int security_perf_event_read(struct perf_event *event) { return call_int_hook(perf_event_read, 0, event); } int security_perf_event_write(struct perf_event *event) { return call_int_hook(perf_event_write, 0, event); } #endif /* CONFIG_PERF_EVENTS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 2003-2005 Red Hat, Inc. All rights reserved. * Copyright 2003-2005 Jeff Garzik * * libata documentation is available via 'make {ps|pdf}docs', * as Documentation/driver-api/libata.rst */ #ifndef __LINUX_LIBATA_H__ #define __LINUX_LIBATA_H__ #include <linux/delay.h> #include <linux/jiffies.h> #include <linux/interrupt.h> #include <linux/dma-mapping.h> #include <linux/scatterlist.h> #include <linux/io.h> #include <linux/ata.h> #include <linux/workqueue.h> #include <scsi/scsi_host.h> #include <linux/acpi.h> #include <linux/cdrom.h> #include <linux/sched.h> #include <linux/async.h> /* * Define if arch has non-standard setup. This is a _PCI_ standard * not a legacy or ISA standard. */ #ifdef CONFIG_ATA_NONSTANDARD #include <asm/libata-portmap.h> #else #define ATA_PRIMARY_IRQ(dev) 14 #define ATA_SECONDARY_IRQ(dev) 15 #endif /* * compile-time options: to be removed as soon as all the drivers are * converted to the new debugging mechanism */ #undef ATA_DEBUG /* debugging output */ #undef ATA_VERBOSE_DEBUG /* yet more debugging output */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ #undef ATA_NDEBUG /* define to disable quick runtime checks */ /* note: prints function name for you */ #ifdef ATA_DEBUG #define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) #ifdef ATA_VERBOSE_DEBUG #define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) #else #define VPRINTK(fmt, args...) #endif /* ATA_VERBOSE_DEBUG */ #else #define DPRINTK(fmt, args...) #define VPRINTK(fmt, args...) #endif /* ATA_DEBUG */ #define ata_print_version_once(dev, version) \ ({ \ static bool __print_once; \ \ if (!__print_once) { \ __print_once = true; \ ata_print_version(dev, version); \ } \ }) /* NEW: debug levels */ #define HAVE_LIBATA_MSG 1 enum { ATA_MSG_DRV = 0x0001, ATA_MSG_INFO = 0x0002, ATA_MSG_PROBE = 0x0004, ATA_MSG_WARN = 0x0008, ATA_MSG_MALLOC = 0x0010, ATA_MSG_CTL = 0x0020, ATA_MSG_INTR = 0x0040, ATA_MSG_ERR = 0x0080, }; #define ata_msg_drv(p) ((p)->msg_enable & ATA_MSG_DRV) #define ata_msg_info(p) ((p)->msg_enable & ATA_MSG_INFO) #define ata_msg_probe(p) ((p)->msg_enable & ATA_MSG_PROBE) #define ata_msg_warn(p) ((p)->msg_enable & ATA_MSG_WARN) #define ata_msg_malloc(p) ((p)->msg_enable & ATA_MSG_MALLOC) #define ata_msg_ctl(p) ((p)->msg_enable & ATA_MSG_CTL) #define ata_msg_intr(p) ((p)->msg_enable & ATA_MSG_INTR) #define ata_msg_err(p) ((p)->msg_enable & ATA_MSG_ERR) static inline u32 ata_msg_init(int dval, int default_msg_enable_bits) { if (dval < 0 || dval >= (sizeof(u32) * 8)) return default_msg_enable_bits; /* should be 0x1 - only driver info msgs */ if (!dval) return 0; return (1 << dval) - 1; } /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU enum { /* various global constants */ LIBATA_MAX_PRD = ATA_MAX_PRD / 2, LIBATA_DUMB_MAX_PRD = ATA_MAX_PRD / 4, /* Worst case */ ATA_DEF_QUEUE = 1, ATA_MAX_QUEUE = 32, ATA_TAG_INTERNAL = ATA_MAX_QUEUE, ATA_SHORT_PAUSE = 16, ATAPI_MAX_DRAIN = 16 << 10, ATA_ALL_DEVICES = (1 << ATA_MAX_DEVICES) - 1, ATA_SHT_EMULATED = 1, ATA_SHT_THIS_ID = -1, /* struct ata_taskfile flags */ ATA_TFLAG_LBA48 = (1 << 0), /* enable 48-bit LBA and "HOB" */ ATA_TFLAG_ISADDR = (1 << 1), /* enable r/w to nsect/lba regs */ ATA_TFLAG_DEVICE = (1 << 2), /* enable r/w to device reg */ ATA_TFLAG_WRITE = (1 << 3), /* data dir: host->dev==1 (write) */ ATA_TFLAG_LBA = (1 << 4), /* enable LBA */ ATA_TFLAG_FUA = (1 << 5), /* enable FUA */ ATA_TFLAG_POLLING = (1 << 6), /* set nIEN to 1 and use polling */ /* struct ata_device stuff */ ATA_DFLAG_LBA = (1 << 0), /* device supports LBA */ ATA_DFLAG_LBA48 = (1 << 1), /* device supports LBA48 */ ATA_DFLAG_CDB_INTR = (1 << 2), /* device asserts INTRQ when ready for CDB */ ATA_DFLAG_NCQ = (1 << 3), /* device supports NCQ */ ATA_DFLAG_FLUSH_EXT = (1 << 4), /* do FLUSH_EXT instead of FLUSH */ ATA_DFLAG_ACPI_PENDING = (1 << 5), /* ACPI resume action pending */ ATA_DFLAG_ACPI_FAILED = (1 << 6), /* ACPI on devcfg has failed */ ATA_DFLAG_AN = (1 << 7), /* AN configured */ ATA_DFLAG_TRUSTED = (1 << 8), /* device supports trusted send/recv */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_CFG_MASK = (1 << 12) - 1, ATA_DFLAG_PIO = (1 << 12), /* device limited to PIO mode */ ATA_DFLAG_NCQ_OFF = (1 << 13), /* device limited to non-NCQ mode */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ ATA_DFLAG_NCQ_PRIO_ENABLE = (1 << 21), /* Priority cmds sent to dev */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ ATA_DEV_ATA_UNSUP = 2, /* ATA device (unsupported) */ ATA_DEV_ATAPI = 3, /* ATAPI device */ ATA_DEV_ATAPI_UNSUP = 4, /* ATAPI device (unsupported) */ ATA_DEV_PMP = 5, /* SATA port multiplier */ ATA_DEV_PMP_UNSUP = 6, /* SATA port multiplier (unsupported) */ ATA_DEV_SEMB = 7, /* SEMB */ ATA_DEV_SEMB_UNSUP = 8, /* SEMB (unsupported) */ ATA_DEV_ZAC = 9, /* ZAC device */ ATA_DEV_ZAC_UNSUP = 10, /* ZAC device (unsupported) */ ATA_DEV_NONE = 11, /* no device */ /* struct ata_link flags */ /* NOTE: struct ata_force_param currently stores lflags in u16 */ ATA_LFLAG_NO_HRST = (1 << 1), /* avoid hardreset */ ATA_LFLAG_NO_SRST = (1 << 2), /* avoid softreset */ ATA_LFLAG_ASSUME_ATA = (1 << 3), /* assume ATA class */ ATA_LFLAG_ASSUME_SEMB = (1 << 4), /* assume SEMB class */ ATA_LFLAG_ASSUME_CLASS = ATA_LFLAG_ASSUME_ATA | ATA_LFLAG_ASSUME_SEMB, ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ ATA_LFLAG_RST_ONCE = (1 << 9), /* limit recovery to one reset */ ATA_LFLAG_CHANGED = (1 << 10), /* LPM state changed on this link */ ATA_LFLAG_NO_DB_DELAY = (1 << 11), /* no debounce delay on link resume */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ /* (doesn't imply presence) */ ATA_FLAG_SATA = (1 << 1), ATA_FLAG_NO_LPM = (1 << 2), /* host not happy with LPM */ ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */ ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */ ATA_FLAG_PIO_POLLING = (1 << 9), /* use polling PIO if LLD * doesn't handle PIO interrupts */ ATA_FLAG_NCQ = (1 << 10), /* host supports NCQ */ ATA_FLAG_NO_POWEROFF_SPINDOWN = (1 << 11), /* don't spindown before poweroff */ ATA_FLAG_NO_HIBERNATE_SPINDOWN = (1 << 12), /* don't spindown before hibernation */ ATA_FLAG_DEBUGMSG = (1 << 13), ATA_FLAG_FPDMA_AA = (1 << 14), /* driver supports Auto-Activate */ ATA_FLAG_IGN_SIMPLEX = (1 << 15), /* ignore SIMPLEX */ ATA_FLAG_NO_IORDY = (1 << 16), /* controller lacks iordy */ ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ ATA_FLAG_AN = (1 << 18), /* controller supports AN */ ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ ATA_FLAG_FPDMA_AUX = (1 << 20), /* controller supports H2DFIS aux field */ ATA_FLAG_EM = (1 << 21), /* driver supports enclosure * management */ ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ ATA_FLAG_SAS_HOST = (1 << 24), /* SAS host */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ /* struct ata_port pflags */ ATA_PFLAG_EH_PENDING = (1 << 0), /* EH pending */ ATA_PFLAG_EH_IN_PROGRESS = (1 << 1), /* EH in progress */ ATA_PFLAG_FROZEN = (1 << 2), /* port is frozen */ ATA_PFLAG_RECOVERED = (1 << 3), /* recovery action performed */ ATA_PFLAG_LOADING = (1 << 4), /* boot/loading probe */ ATA_PFLAG_SCSI_HOTPLUG = (1 << 6), /* SCSI hotplug scheduled */ ATA_PFLAG_INITIALIZING = (1 << 7), /* being initialized, don't touch */ ATA_PFLAG_RESETTING = (1 << 8), /* reset in progress */ ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ ATA_PFLAG_PIO32 = (1 << 20), /* 32bit PIO */ ATA_PFLAG_PIO32CHANGE = (1 << 21), /* 32bit PIO can be turned on/off */ ATA_PFLAG_EXTERNAL = (1 << 22), /* eSATA/external port */ /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_DMAMAP = (1 << 1), /* SG table is DMA mapped */ ATA_QCFLAG_IO = (1 << 3), /* standard IO command */ ATA_QCFLAG_RESULT_TF = (1 << 4), /* result TF requested */ ATA_QCFLAG_CLEAR_EXCL = (1 << 5), /* clear excl_link on completion */ ATA_QCFLAG_QUIET = (1 << 6), /* don't report device error */ ATA_QCFLAG_RETRY = (1 << 7), /* retry after failure */ ATA_QCFLAG_FAILED = (1 << 16), /* cmd failed and is owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */ /* host set flags */ ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ ATA_HOST_STARTED = (1 << 1), /* Host started */ ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ ATA_TMOUT_BOOT = 30000, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ ATA_TMOUT_INTERNAL_QUICK = 5000, ATA_TMOUT_MAX_PARK = 30000, /* * GoVault needs 2s and iVDR disk HHD424020F7SV00 800ms. 2s * is too much without parallel probing. Use 2s if parallel * probing is available, 800ms otherwise. */ ATA_TMOUT_FF_WAIT_LONG = 2000, ATA_TMOUT_FF_WAIT = 800, /* Spec mandates to wait for ">= 2ms" before checking status * after reset. We wait 150ms, because that was the magic * delay used for ATAPI devices in Hale Landis's ATADRVR, for * the period of time between when the ATA command register is * written, and then status is checked. Because waiting for * "a while" before checking status is fine, post SRST, we * perform this magic delay here as well. * * Old drivers/ide uses the 2mS rule and then waits for ready. */ ATA_WAIT_AFTER_RESET = 150, /* If PMP is supported, we have to do follow-up SRST. As some * PMPs don't send D2H Reg FIS after hardreset, LLDs are * advised to wait only for the following duration before * doing SRST. */ ATA_TMOUT_PMP_SRST_WAIT = 5000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that * occurs within 10s after the policy change. */ ATA_TMOUT_SPURIOUS_PHY = 10000, /* ATA bus states */ BUS_UNKNOWN = 0, BUS_DMA = 1, BUS_IDLE = 2, BUS_NOINTR = 3, BUS_NODATA = 4, BUS_TIMER = 5, BUS_PIO = 6, BUS_EDD = 7, BUS_IDENTIFY = 8, BUS_PACKET = 9, /* SATA port states */ PORT_UNKNOWN = 0, PORT_ENABLED = 1, PORT_DISABLED = 2, /* encoding various smaller bitmaps into a single * unsigned long bitmap */ ATA_NR_PIO_MODES = 7, ATA_NR_MWDMA_MODES = 5, ATA_NR_UDMA_MODES = 8, ATA_SHIFT_PIO = 0, ATA_SHIFT_MWDMA = ATA_SHIFT_PIO + ATA_NR_PIO_MODES, ATA_SHIFT_UDMA = ATA_SHIFT_MWDMA + ATA_NR_MWDMA_MODES, ATA_SHIFT_PRIO = 6, ATA_PRIO_HIGH = 2, /* size of buffer to pad xfers ending on unaligned boundaries */ ATA_DMA_PAD_SZ = 4, /* ering size */ ATA_ERING_SIZE = 32, /* return values for ->qc_defer */ ATA_DEFER_LINK = 1, ATA_DEFER_PORT = 2, /* desc_len for ata_eh_info and context */ ATA_EH_DESC_LEN = 80, /* reset / recovery action types */ ATA_EH_REVALIDATE = (1 << 0), ATA_EH_SOFTRESET = (1 << 1), /* meaningful only in ->prereset */ ATA_EH_HARDRESET = (1 << 2), /* meaningful only in ->prereset */ ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ ATA_EHI_QUIET = (1 << 3), /* be quiet */ ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */ ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, /* mask of flags to transfer *to* the slave link */ ATA_EHI_TO_SLAVE_MASK = ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, /* max tries if error condition is still set after ->error_handler */ ATA_EH_MAX_TRIES = 5, /* sometimes resuming a link requires several retries */ ATA_LINK_RESUME_TRIES = 5, /* how hard are we gonna try to probe/recover devices */ ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, ATA_EH_PMP_TRIES = 5, ATA_EH_PMP_LINK_TRIES = 3, SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */ /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ ATA_HORKAGE_DIAGNOSTIC = (1 << 0), /* Failed boot diag */ ATA_HORKAGE_NODMA = (1 << 1), /* DMA problems */ ATA_HORKAGE_NONCQ = (1 << 2), /* Don't use NCQ */ ATA_HORKAGE_MAX_SEC_128 = (1 << 3), /* Limit max sects to 128 */ ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands not multiple of 16 bytes */ ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firmware update warning */ ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ ATA_HORKAGE_ATAPI_DMADIR = (1 << 18), /* device requires dmadir */ ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ ATA_DMA_MASK_ATA = (1 << 0), /* DMA on ATA Disk */ ATA_DMA_MASK_ATAPI = (1 << 1), /* DMA on ATAPI */ ATA_DMA_MASK_CFA = (1 << 2), /* DMA on CF Card */ /* ATAPI command types */ ATAPI_READ = 0, /* READs */ ATAPI_WRITE = 1, /* WRITEs */ ATAPI_READ_CD = 2, /* READ CD [MSF] */ ATAPI_PASS_THRU = 3, /* SAT pass-thru */ ATAPI_MISC = 4, /* the rest */ /* Timing constants */ ATA_TIMING_SETUP = (1 << 0), ATA_TIMING_ACT8B = (1 << 1), ATA_TIMING_REC8B = (1 << 2), ATA_TIMING_CYC8B = (1 << 3), ATA_TIMING_8BIT = ATA_TIMING_ACT8B | ATA_TIMING_REC8B | ATA_TIMING_CYC8B, ATA_TIMING_ACTIVE = (1 << 4), ATA_TIMING_RECOVER = (1 << 5), ATA_TIMING_DMACK_HOLD = (1 << 6), ATA_TIMING_CYCLE = (1 << 7), ATA_TIMING_UDMA = (1 << 8), ATA_TIMING_ALL = ATA_TIMING_SETUP | ATA_TIMING_ACT8B | ATA_TIMING_REC8B | ATA_TIMING_CYC8B | ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER | ATA_TIMING_DMACK_HOLD | ATA_TIMING_CYCLE | ATA_TIMING_UDMA, /* ACPI constants */ ATA_ACPI_FILTER_SETXFER = 1 << 0, ATA_ACPI_FILTER_LOCK = 1 << 1, ATA_ACPI_FILTER_DIPM = 1 << 2, ATA_ACPI_FILTER_FPDMA_OFFSET = 1 << 3, /* FPDMA non-zero offset */ ATA_ACPI_FILTER_FPDMA_AA = 1 << 4, /* FPDMA auto activate */ ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | ATA_ACPI_FILTER_LOCK | ATA_ACPI_FILTER_DIPM, }; enum ata_xfer_mask { ATA_MASK_PIO = ((1LU << ATA_NR_PIO_MODES) - 1) << ATA_SHIFT_PIO, ATA_MASK_MWDMA = ((1LU << ATA_NR_MWDMA_MODES) - 1) << ATA_SHIFT_MWDMA, ATA_MASK_UDMA = ((1LU << ATA_NR_UDMA_MODES) - 1) << ATA_SHIFT_UDMA, }; enum hsm_task_states { HSM_ST_IDLE, /* no command on going */ HSM_ST_FIRST, /* (waiting the device to) write CDB or first data block */ HSM_ST, /* (waiting the device to) transfer data */ HSM_ST_LAST, /* (waiting the device to) complete command */ HSM_ST_ERR, /* error */ }; enum ata_completion_errors { AC_ERR_OK = 0, /* no error */ AC_ERR_DEV = (1 << 0), /* device reported error */ AC_ERR_HSM = (1 << 1), /* host state machine violation */ AC_ERR_TIMEOUT = (1 << 2), /* timeout */ AC_ERR_MEDIA = (1 << 3), /* media error */ AC_ERR_ATA_BUS = (1 << 4), /* ATA bus error */ AC_ERR_HOST_BUS = (1 << 5), /* host bus error */ AC_ERR_SYSTEM = (1 << 6), /* system error */ AC_ERR_INVALID = (1 << 7), /* invalid argument */ AC_ERR_OTHER = (1 << 8), /* unknown */ AC_ERR_NODEV_HINT = (1 << 9), /* polling device detection hint */ AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */ }; /* * Link power management policy: If you alter this, you also need to * alter libata-scsi.c (for the ascii descriptions) */ enum ata_lpm_policy { ATA_LPM_UNKNOWN, ATA_LPM_MAX_POWER, ATA_LPM_MED_POWER, ATA_LPM_MED_POWER_WITH_DIPM, /* Med power + DIPM as win IRST does */ ATA_LPM_MIN_POWER_WITH_PARTIAL, /* Min Power + partial and slumber */ ATA_LPM_MIN_POWER, /* Min power + no partial (slumber only) */ }; enum ata_lpm_hints { ATA_LPM_EMPTY = (1 << 0), /* port empty/probing */ ATA_LPM_HIPM = (1 << 1), /* may use HIPM */ ATA_LPM_WAKE_ONLY = (1 << 2), /* only wake up link */ }; /* forward declarations */ struct scsi_device; struct ata_port_operations; struct ata_port; struct ata_link; struct ata_queued_cmd; /* typedefs */ typedef void (*ata_qc_cb_t) (struct ata_queued_cmd *qc); typedef int (*ata_prereset_fn_t)(struct ata_link *link, unsigned long deadline); typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes, unsigned long deadline); typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes); extern struct device_attribute dev_attr_unload_heads; #ifdef CONFIG_SATA_HOST extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_enable; extern struct device_attribute dev_attr_em_message_type; extern struct device_attribute dev_attr_em_message; extern struct device_attribute dev_attr_sw_activity; #endif enum sw_activity { OFF, BLINK_ON, BLINK_OFF, }; struct ata_taskfile { unsigned long flags; /* ATA_TFLAG_xxx */ u8 protocol; /* ATA_PROT_xxx */ u8 ctl; /* control reg */ u8 hob_feature; /* additional data */ u8 hob_nsect; /* to support LBA48 */ u8 hob_lbal; u8 hob_lbam; u8 hob_lbah; u8 feature; u8 nsect; u8 lbal; u8 lbam; u8 lbah; u8 device; u8 command; /* IO operation */ u32 auxiliary; /* auxiliary field */ /* from SATA 3.1 and */ /* ATA-8 ACS-3 */ }; #ifdef CONFIG_ATA_SFF struct ata_ioports { void __iomem *cmd_addr; void __iomem *data_addr; void __iomem *error_addr; void __iomem *feature_addr; void __iomem *nsect_addr; void __iomem *lbal_addr; void __iomem *lbam_addr; void __iomem *lbah_addr; void __iomem *device_addr; void __iomem *status_addr; void __iomem *command_addr; void __iomem *altstatus_addr; void __iomem *ctl_addr; #ifdef CONFIG_ATA_BMDMA void __iomem *bmdma_addr; #endif /* CONFIG_ATA_BMDMA */ void __iomem *scr_addr; }; #endif /* CONFIG_ATA_SFF */ struct ata_host { spinlock_t lock; struct device *dev; void __iomem * const *iomap; unsigned int n_ports; unsigned int n_tags; /* nr of NCQ tags */ void *private_data; struct ata_port_operations *ops; unsigned long flags; struct kref kref; struct mutex eh_mutex; struct task_struct *eh_owner; struct ata_port *simplex_claimed; /* channel owning the DMA */ struct ata_port *ports[]; }; struct ata_queued_cmd { struct ata_port *ap; struct ata_device *dev; struct scsi_cmnd *scsicmd; void (*scsidone)(struct scsi_cmnd *); struct ata_taskfile tf; u8 cdb[ATAPI_CDB_LEN]; unsigned long flags; /* ATA_QCFLAG_xxx */ unsigned int tag; /* libata core tag */ unsigned int hw_tag; /* driver tag */ unsigned int n_elem; unsigned int orig_n_elem; int dma_dir; unsigned int sect_size; unsigned int nbytes; unsigned int extrabytes; unsigned int curbytes; struct scatterlist sgent; struct scatterlist *sg; struct scatterlist *cursg; unsigned int cursg_ofs; unsigned int err_mask; struct ata_taskfile result_tf; ata_qc_cb_t complete_fn; void *private_data; void *lldd_task; }; struct ata_port_stats { unsigned long unhandled_irq; unsigned long idle_irq; unsigned long rw_reqbuf; }; struct ata_ering_entry { unsigned int eflags; unsigned int err_mask; u64 timestamp; }; struct ata_ering { int cursor; struct ata_ering_entry ring[ATA_ERING_SIZE]; }; struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ unsigned int horkage; /* List of broken features */ unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ void *private_data; #ifdef CONFIG_ATA_ACPI union acpi_object *gtf_cache; unsigned int gtf_filter; #endif #ifdef CONFIG_SATA_ZPODD void *zpodd; #endif struct device tdev; /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ u64 n_native_sectors; /* native size, if ATA */ unsigned int class; /* ATA_DEV_xxx */ unsigned long unpark_deadline; u8 pio_mode; u8 dma_mode; u8 xfer_mode; unsigned int xfer_shift; /* ATA_SHIFT_xxx */ unsigned int multi_count; /* sectors count for READ/WRITE MULTIPLE */ unsigned int max_sectors; /* per-device max sectors */ unsigned int cdb_len; /* per-dev xfer mask */ unsigned long pio_mask; unsigned long mwdma_mask; unsigned long udma_mask; /* for CHS addressing */ u16 cylinders; /* Number of cylinders */ u16 heads; /* Number of heads */ u16 sectors; /* Number of sectors per track */ union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ } ____cacheline_aligned; /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; /* NCQ send and receive log subcommand support */ u8 ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_SIZE]; u8 ncq_non_data_cmds[ATA_LOG_NCQ_NON_DATA_SIZE]; /* ZAC zone configuration */ u32 zac_zoned_cap; u32 zac_zones_optimal_open; u32 zac_zones_optimal_nonseq; u32 zac_zones_max_open; /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ struct ata_ering ering; }; /* Fields between ATA_DEVICE_CLEAR_BEGIN and ATA_DEVICE_CLEAR_END are * cleared to zero on ata_dev_init(). */ #define ATA_DEVICE_CLEAR_BEGIN offsetof(struct ata_device, n_sectors) #define ATA_DEVICE_CLEAR_END offsetof(struct ata_device, ering) struct ata_eh_info { struct ata_device *dev; /* offending device */ u32 serror; /* SError from LLDD */ unsigned int err_mask; /* port-wide err_mask */ unsigned int action; /* ATA_EH_* action mask */ unsigned int dev_action[ATA_MAX_DEVICES]; /* dev EH action */ unsigned int flags; /* ATA_EHI_* flags */ unsigned int probe_mask; char desc[ATA_EH_DESC_LEN]; int desc_len; }; struct ata_eh_context { struct ata_eh_info i; int tries[ATA_MAX_DEVICES]; int cmd_timeout_idx[ATA_MAX_DEVICES] [ATA_EH_CMD_TIMEOUT_TABLE_SIZE]; unsigned int classes[ATA_MAX_DEVICES]; unsigned int did_probe_mask; unsigned int unloaded_mask; unsigned int saved_ncq_enabled; u8 saved_xfer_mode[ATA_MAX_DEVICES]; /* timestamp for the last reset attempt or success */ unsigned long last_reset; }; struct ata_acpi_drive { u32 pio; u32 dma; } __packed; struct ata_acpi_gtm { struct ata_acpi_drive drive[2]; u32 flags; } __packed; struct ata_link { struct ata_port *ap; int pmp; /* port multiplier port # */ struct device tdev; unsigned int active_tag; /* active tag on this link */ u32 sactive; /* active NCQ commands */ unsigned int flags; /* ATA_LFLAG_xxx */ u32 saved_scontrol; /* SControl on probe */ unsigned int hw_sata_spd_limit; unsigned int sata_spd_limit; unsigned int sata_spd; /* current SATA PHY speed */ enum ata_lpm_policy lpm_policy; /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; /* EH context */ struct ata_eh_context eh_context; struct ata_device device[ATA_MAX_DEVICES]; unsigned long last_lpm_change; /* when last LPM change happened */ }; #define ATA_LINK_CLEAR_BEGIN offsetof(struct ata_link, active_tag) #define ATA_LINK_CLEAR_END offsetof(struct ata_link, device[0]) struct ata_port { struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ struct ata_port_operations *ops; spinlock_t *lock; /* Flags owned by the EH context. Only EH should touch these once the port is active */ unsigned long flags; /* ATA_FLAG_xxx */ /* Flags that change dynamically, protected by ap->lock */ unsigned int pflags; /* ATA_PFLAG_xxx */ unsigned int print_id; /* user visible unique port ID */ unsigned int local_port_no; /* host local port num */ unsigned int port_no; /* 0 based port no. inside the host */ #ifdef CONFIG_ATA_SFF struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ struct ata_link* sff_pio_task_link; /* link currently used */ struct delayed_work sff_pio_task; #ifdef CONFIG_ATA_BMDMA struct ata_bmdma_prd *bmdma_prd; /* BMDMA SG list */ dma_addr_t bmdma_prd_dma; /* and its DMA mapping */ #endif /* CONFIG_ATA_BMDMA */ #endif /* CONFIG_ATA_SFF */ unsigned int pio_mask; unsigned int mwdma_mask; unsigned int udma_mask; unsigned int cbl; /* cable type; ATA_CBL_xxx */ struct ata_queued_cmd qcmd[ATA_MAX_QUEUE + 1]; unsigned long sas_tag_allocated; /* for sas tag allocation only */ u64 qc_active; int nr_active_links; /* #links with active qcs */ unsigned int sas_last_tag; /* track next tag hw expects */ struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ int nr_pmp_links; /* nr of available PMP links */ struct ata_link *pmp_link; /* array of PMP links */ struct ata_link *excl_link; /* for PMP qc exclusion */ struct ata_port_stats stats; struct ata_host *host; struct device *dev; struct device tdev; struct mutex scsi_scan_mutex; struct delayed_work hotplug_task; struct work_struct scsi_rescan_task; unsigned int hsm_task_state; u32 msg_enable; struct list_head eh_done_q; wait_queue_head_t eh_wait_q; int eh_tries; struct completion park_req_pending; pm_message_t pm_mesg; enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; async_cookie_t cookie; int em_message_type; void *private_data; #ifdef CONFIG_ATA_ACPI struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif /* owned by EH */ u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; /* The following initializer overrides a method to NULL whether one of * its parent has the method defined or not. This is equivalent to * ERR_PTR(-ENOENT). Unfortunately, ERR_PTR doesn't render a constant * expression and thus can't be used as an initializer. */ #define ATA_OP_NULL (void *)(unsigned long)(-ENOENT) struct ata_port_operations { /* * Command execution */ int (*qc_defer)(struct ata_queued_cmd *qc); int (*check_atapi_dma)(struct ata_queued_cmd *qc); enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); bool (*qc_fill_rtf)(struct ata_queued_cmd *qc); /* * Configuration and exception handling */ int (*cable_detect)(struct ata_port *ap); unsigned long (*mode_filter)(struct ata_device *dev, unsigned long xfer_mask); void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); void (*dev_config)(struct ata_device *dev); void (*freeze)(struct ata_port *ap); void (*thaw)(struct ata_port *ap); ata_prereset_fn_t prereset; ata_reset_fn_t softreset; ata_reset_fn_t hardreset; ata_postreset_fn_t postreset; ata_prereset_fn_t pmp_prereset; ata_reset_fn_t pmp_softreset; ata_reset_fn_t pmp_hardreset; ata_postreset_fn_t pmp_postreset; void (*error_handler)(struct ata_port *ap); void (*lost_interrupt)(struct ata_port *ap); void (*post_internal_cmd)(struct ata_queued_cmd *qc); void (*sched_eh)(struct ata_port *ap); void (*end_eh)(struct ata_port *ap); /* * Optional features */ int (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val); int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); int (*set_lpm)(struct ata_link *link, enum ata_lpm_policy policy, unsigned hints); /* * Start, stop, suspend and resume */ int (*port_suspend)(struct ata_port *ap, pm_message_t mesg); int (*port_resume)(struct ata_port *ap); int (*port_start)(struct ata_port *ap); void (*port_stop)(struct ata_port *ap); void (*host_stop)(struct ata_host *host); #ifdef CONFIG_ATA_SFF /* * SFF / taskfile oriented ops */ void (*sff_dev_select)(struct ata_port *ap, unsigned int device); void (*sff_set_devctl)(struct ata_port *ap, u8 ctl); u8 (*sff_check_status)(struct ata_port *ap); u8 (*sff_check_altstatus)(struct ata_port *ap); void (*sff_tf_load)(struct ata_port *ap, const struct ata_taskfile *tf); void (*sff_tf_read)(struct ata_port *ap, struct ata_taskfile *tf); void (*sff_exec_command)(struct ata_port *ap, const struct ata_taskfile *tf); unsigned int (*sff_data_xfer)(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); void (*sff_irq_on)(struct ata_port *); bool (*sff_irq_check)(struct ata_port *); void (*sff_irq_clear)(struct ata_port *); void (*sff_drain_fifo)(struct ata_queued_cmd *qc); #ifdef CONFIG_ATA_BMDMA void (*bmdma_setup)(struct ata_queued_cmd *qc); void (*bmdma_start)(struct ata_queued_cmd *qc); void (*bmdma_stop)(struct ata_queued_cmd *qc); u8 (*bmdma_status)(struct ata_port *ap); #endif /* CONFIG_ATA_BMDMA */ #endif /* CONFIG_ATA_SFF */ ssize_t (*em_show)(struct ata_port *ap, char *buf); ssize_t (*em_store)(struct ata_port *ap, const char *message, size_t size); ssize_t (*sw_activity_show)(struct ata_device *dev, char *buf); ssize_t (*sw_activity_store)(struct ata_device *dev, enum sw_activity val); ssize_t (*transmit_led_message)(struct ata_port *ap, u32 state, ssize_t size); /* * Obsolete */ void (*phy_reset)(struct ata_port *ap); void (*eng_timeout)(struct ata_port *ap); /* * ->inherits must be the last field and all the preceding * fields must be pointers. */ const struct ata_port_operations *inherits; }; struct ata_port_info { unsigned long flags; unsigned long link_flags; unsigned long pio_mask; unsigned long mwdma_mask; unsigned long udma_mask; struct ata_port_operations *port_ops; void *private_data; }; struct ata_timing { unsigned short mode; /* ATA mode */ unsigned short setup; /* t1 */ unsigned short act8b; /* t2 for 8-bit I/O */ unsigned short rec8b; /* t2i for 8-bit I/O */ unsigned short cyc8b; /* t0 for 8-bit I/O */ unsigned short active; /* t2 or tD */ unsigned short recover; /* t2i or tK */ unsigned short dmack_hold; /* tj */ unsigned short cycle; /* t0 */ unsigned short udma; /* t2CYCTYP/2 */ }; /* * Core layer - drivers/ata/libata-core.c */ extern struct ata_port_operations ata_dummy_port_ops; extern const struct ata_port_info ata_dummy_port_info; static inline bool ata_is_atapi(u8 prot) { return prot & ATA_PROT_FLAG_ATAPI; } static inline bool ata_is_pio(u8 prot) { return prot & ATA_PROT_FLAG_PIO; } static inline bool ata_is_dma(u8 prot) { return prot & ATA_PROT_FLAG_DMA; } static inline bool ata_is_ncq(u8 prot) { return prot & ATA_PROT_FLAG_NCQ; } static inline bool ata_is_data(u8 prot) { return prot & (ATA_PROT_FLAG_PIO | ATA_PROT_FLAG_DMA); } static inline int is_multi_taskfile(struct ata_taskfile *tf) { return (tf->command == ATA_CMD_READ_MULTI) || (tf->command == ATA_CMD_WRITE_MULTI) || (tf->command == ATA_CMD_READ_MULTI_EXT) || (tf->command == ATA_CMD_WRITE_MULTI_EXT) || (tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT); } static inline int ata_port_is_dummy(struct ata_port *ap) { return ap->ops == &ata_dummy_port_ops; } extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); extern int sata_std_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); extern void ata_std_postreset(struct ata_link *link, unsigned int *classes); extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); extern void ata_host_get(struct ata_host *host); extern void ata_host_put(struct ata_host *host); extern int ata_host_start(struct ata_host *host); extern int ata_host_register(struct ata_host *host, struct scsi_host_template *sht); extern int ata_host_activate(struct ata_host *host, int irq, irq_handler_t irq_handler, unsigned long irq_flags, struct scsi_host_template *sht); extern void ata_host_detach(struct ata_host *host); extern void ata_host_init(struct ata_host *, struct device *, struct ata_port_operations *); extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, void __user *arg); #ifdef CONFIG_COMPAT #define ATA_SCSI_COMPAT_IOCTL .compat_ioctl = ata_scsi_ioctl, #else #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); #if IS_REACHABLE(CONFIG_ATA) bool ata_scsi_dma_need_drain(struct request *rq); #else #define ata_scsi_dma_need_drain NULL #endif extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); extern void ata_sas_port_suspend(struct ata_port *ap); extern void ata_sas_port_resume(struct ata_port *ap); #else static inline void ata_sas_port_suspend(struct ata_port *ap) { } static inline void ata_sas_port_resume(struct ata_port *ap) { } #endif extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); extern unsigned long ata_pack_xfermask(unsigned long pio_mask, unsigned long mwdma_mask, unsigned long udma_mask); extern void ata_unpack_xfermask(unsigned long xfer_mask, unsigned long *pio_mask, unsigned long *mwdma_mask, unsigned long *udma_mask); extern u8 ata_xfer_mask2mode(unsigned long xfer_mask); extern unsigned long ata_xfer_mode2mask(u8 xfer_mode); extern int ata_xfer_mode2shift(unsigned long xfer_mode); extern const char *ata_mode_string(unsigned long xfer_mask); extern unsigned long ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); extern void ata_dev_disable(struct ata_device *adev); extern void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern unsigned int ata_do_dev_read_id(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); extern int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev, int queue_depth); extern struct ata_device *ata_dev_pair(struct ata_device *adev); extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q); /* * SATA specific code - drivers/ata/libata-sata.c */ #ifdef CONFIG_SATA_HOST extern const unsigned long sata_deb_timing_normal[]; extern const unsigned long sata_deb_timing_hotplug[]; extern const unsigned long sata_deb_timing_long[]; static inline const unsigned long * sata_ehc_deb_timing(struct ata_eh_context *ehc) { if (ehc->i.flags & ATA_EHI_HOTPLUGGED) return sata_deb_timing_hotplug; else return sata_deb_timing_normal; } extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); extern int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else static inline const unsigned long * sata_ehc_deb_timing(struct ata_eh_context *ehc) { return NULL; } static inline int sata_scr_valid(struct ata_link *link) { return 0; } static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val) { return -EOPNOTSUPP; } static inline int sata_scr_write(struct ata_link *link, int reg, u32 val) { return -EOPNOTSUPP; } static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) { return -EOPNOTSUPP; } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } static inline int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)) { if (online) *online = false; return -EOPNOTSUPP; } static inline int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline) { return -EOPNOTSUPP; } static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, const unsigned long *params, unsigned long deadline); extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_sas_async_probe(struct ata_port *ap); extern int ata_sas_sync_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_port_start(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active); extern bool sata_lpm_ignore_phy_events(struct ata_link *link); extern int sata_async_notification(struct ata_port *ap); extern int ata_cable_40wire(struct ata_port *ap); extern int ata_cable_80wire(struct ata_port *ap); extern int ata_cable_sata(struct ata_port *ap); extern int ata_cable_ignore(struct ata_port *ap); extern int ata_cable_unknown(struct ata_port *ap); /* Timing helpers */ extern unsigned int ata_pio_need_iordy(const struct ata_device *); extern u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle); /* PCI */ #ifdef CONFIG_PCI struct pci_dev; struct pci_bits { unsigned int reg; /* PCI config register to read */ unsigned int width; /* 1 (8 bit), 2 (16 bit), 4 (32 bit) */ unsigned long mask; unsigned long val; }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM extern void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t mesg); extern int __must_check ata_pci_device_do_resume(struct pci_dev *pdev); extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); extern int ata_pci_device_resume(struct pci_dev *pdev); #endif /* CONFIG_PM */ #endif /* CONFIG_PCI */ struct platform_device; extern int ata_platform_remove_one(struct platform_device *pdev); /* * ACPI - drivers/ata/libata-acpi.c */ #ifdef CONFIG_ATA_ACPI static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { if (ap->pflags & ATA_PFLAG_INIT_GTM_VALID) return &ap->__acpi_init_gtm; return NULL; } int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned long ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { return NULL; } static inline int ata_acpi_stm(const struct ata_port *ap, struct ata_acpi_gtm *stm) { return -ENOSYS; } static inline int ata_acpi_gtm(const struct ata_port *ap, struct ata_acpi_gtm *stm) { return -ENOSYS; } static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm) { return 0; } static inline int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) { return 0; } #endif /* * EH - drivers/ata/libata-eh.c */ extern void ata_port_schedule_eh(struct ata_port *ap); extern void ata_port_wait_eh(struct ata_port *ap); extern int ata_link_abort(struct ata_link *link); extern int ata_port_abort(struct ata_port *ap); extern int ata_port_freeze(struct ata_port *ap); extern void ata_eh_freeze_port(struct ata_port *ap); extern void ata_eh_thaw_port(struct ata_port *ap); extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset); extern void ata_std_error_handler(struct ata_port *ap); extern void ata_std_sched_eh(struct ata_port *ap); extern void ata_std_end_eh(struct ata_port *ap); extern int ata_link_nr_enabled(struct ata_link *link); /* * Base operations to inherit from and initializers for sht * * Operations * * base : Common to all libata drivers. * sata : SATA controllers w/ native interface. * pmp : SATA controllers w/ PMP support. * sff : SFF ATA controllers w/o BMDMA support. * bmdma : SFF ATA controllers w/ BMDMA support. * * sht initializers * * BASE : Common to all libata drivers. The user must set * sg_tablesize and dma_boundary. * PIO : SFF ATA controllers w/ only PIO support. * BMDMA : SFF ATA controllers w/ BMDMA support. sg_tablesize and * dma_boundary are set to BMDMA limits. * NCQ : SATA controllers supporting NCQ. The user must set * sg_tablesize, dma_boundary and can_queue. */ extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; extern struct device_attribute *ata_common_sdev_attrs[]; /* * All sht initializers (BASE, PIO, BMDMA, NCQ) must be instantiated * by the edge drivers. Because the 'module' field of sht must be the * edge driver's module reference, otherwise the driver can be unloaded * even if the scsi_device is being accessed. */ #define __ATA_BASE_SHT(drv_name) \ .module = THIS_MODULE, \ .name = drv_name, \ .ioctl = ata_scsi_ioctl, \ ATA_SCSI_COMPAT_IOCTL \ .queuecommand = ata_scsi_queuecmd, \ .dma_need_drain = ata_scsi_dma_need_drain, \ .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity #define ATA_BASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .sdev_attrs = ata_common_sdev_attrs #ifdef CONFIG_SATA_HOST extern struct device_attribute *ata_ncq_sdev_attrs[]; #define ATA_NCQ_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .sdev_attrs = ata_ncq_sdev_attrs, \ .change_queue_depth = ata_scsi_change_queue_depth #endif /* * PMP helpers */ #ifdef CONFIG_SATA_PMP static inline bool sata_pmp_supported(struct ata_port *ap) { return ap->flags & ATA_FLAG_PMP; } static inline bool sata_pmp_attached(struct ata_port *ap) { return ap->nr_pmp_links != 0; } static inline bool ata_is_host_link(const struct ata_link *link) { return link == &link->ap->link || link == link->ap->slave_link; } #else /* CONFIG_SATA_PMP */ static inline bool sata_pmp_supported(struct ata_port *ap) { return false; } static inline bool sata_pmp_attached(struct ata_port *ap) { return false; } static inline bool ata_is_host_link(const struct ata_link *link) { return 1; } #endif /* CONFIG_SATA_PMP */ static inline int sata_srst_pmp(struct ata_link *link) { if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) return SATA_PMP_CTRL_PORT; return link->pmp; } /* * printk helpers */ __printf(3, 4) void ata_port_printk(const struct ata_port *ap, const char *level, const char *fmt, ...); __printf(3, 4) void ata_link_printk(const struct ata_link *link, const char *level, const char *fmt, ...); __printf(3, 4) void ata_dev_printk(const struct ata_device *dev, const char *level, const char *fmt, ...); #define ata_port_err(ap, fmt, ...) \ ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__) #define ata_port_warn(ap, fmt, ...) \ ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__) #define ata_port_notice(ap, fmt, ...) \ ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__) #define ata_port_info(ap, fmt, ...) \ ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__) #define ata_port_dbg(ap, fmt, ...) \ ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__) #define ata_link_err(link, fmt, ...) \ ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__) #define ata_link_warn(link, fmt, ...) \ ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__) #define ata_link_notice(link, fmt, ...) \ ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__) #define ata_link_info(link, fmt, ...) \ ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__) #define ata_link_dbg(link, fmt, ...) \ ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__) #define ata_dev_err(dev, fmt, ...) \ ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__) #define ata_dev_warn(dev, fmt, ...) \ ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__) #define ata_dev_notice(dev, fmt, ...) \ ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__) #define ata_dev_info(dev, fmt, ...) \ ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__) #define ata_dev_dbg(dev, fmt, ...) \ ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__) void ata_print_version(const struct device *dev, const char *version); /* * ata_eh_info helpers */ extern __printf(2, 3) void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); extern __printf(2, 3) void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); extern void ata_ehi_clear_desc(struct ata_eh_info *ehi); static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) { ehi->probe_mask |= (1 << ATA_MAX_DEVICES) - 1; ehi->flags |= ATA_EHI_HOTPLUGGED; ehi->action |= ATA_EH_RESET | ATA_EH_ENABLE_LINK; ehi->err_mask |= AC_ERR_ATA_BUS; } /* * port description helpers */ extern __printf(2, 3) void ata_port_desc(struct ata_port *ap, const char *fmt, ...); #ifdef CONFIG_PCI extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); #endif static inline bool ata_tag_internal(unsigned int tag) { return tag == ATA_TAG_INTERNAL; } static inline bool ata_tag_valid(unsigned int tag) { return tag < ATA_MAX_QUEUE || ata_tag_internal(tag); } #define __ata_qc_for_each(ap, qc, tag, max_tag, fn) \ for ((tag) = 0; (tag) < (max_tag) && \ ({ qc = fn((ap), (tag)); 1; }); (tag)++) \ /* * Internal use only, iterate commands ignoring error handling and * status of 'qc'. */ #define ata_qc_for_each_raw(ap, qc, tag) \ __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, __ata_qc_from_tag) /* * Iterate all potential commands that can be queued */ #define ata_qc_for_each(ap, qc, tag) \ __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, ata_qc_from_tag) /* * Like ata_qc_for_each, but with the internal tag included */ #define ata_qc_for_each_with_internal(ap, qc, tag) \ __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE + 1, ata_qc_from_tag) /* * device helpers */ static inline unsigned int ata_class_enabled(unsigned int class) { return class == ATA_DEV_ATA || class == ATA_DEV_ATAPI || class == ATA_DEV_PMP || class == ATA_DEV_SEMB || class == ATA_DEV_ZAC; } static inline unsigned int ata_class_disabled(unsigned int class) { return class == ATA_DEV_ATA_UNSUP || class == ATA_DEV_ATAPI_UNSUP || class == ATA_DEV_PMP_UNSUP || class == ATA_DEV_SEMB_UNSUP || class == ATA_DEV_ZAC_UNSUP; } static inline unsigned int ata_class_absent(unsigned int class) { return !ata_class_enabled(class) && !ata_class_disabled(class); } static inline unsigned int ata_dev_enabled(const struct ata_device *dev) { return ata_class_enabled(dev->class); } static inline unsigned int ata_dev_disabled(const struct ata_device *dev) { return ata_class_disabled(dev->class); } static inline unsigned int ata_dev_absent(const struct ata_device *dev) { return ata_class_absent(dev->class); } /* * link helpers */ static inline int ata_link_max_devices(const struct ata_link *link) { if (ata_is_host_link(link) && link->ap->flags & ATA_FLAG_SLAVE_POSS) return 2; return 1; } static inline int ata_link_active(struct ata_link *link) { return ata_tag_valid(link->active_tag) || link->sactive; } /* * Iterators * * ATA_LITER_* constants are used to select link iteration mode and * ATA_DITER_* device iteration mode. * * For a custom iteration directly using ata_{link|dev}_next(), if * @link or @dev, respectively, is NULL, the first element is * returned. @dev and @link can be any valid device or link and the * next element according to the iteration mode will be returned. * After the last element, NULL is returned. */ enum ata_link_iter_mode { ATA_LITER_EDGE, /* if present, PMP links only; otherwise, * host link. no slave link */ ATA_LITER_HOST_FIRST, /* host link followed by PMP or slave links */ ATA_LITER_PMP_FIRST, /* PMP links followed by host link, * slave link still comes after host link */ }; enum ata_dev_iter_mode { ATA_DITER_ENABLED, ATA_DITER_ENABLED_REVERSE, ATA_DITER_ALL, ATA_DITER_ALL_REVERSE, }; extern struct ata_link *ata_link_next(struct ata_link *link, struct ata_port *ap, enum ata_link_iter_mode mode); extern struct ata_device *ata_dev_next(struct ata_device *dev, struct ata_link *link, enum ata_dev_iter_mode mode); /* * Shortcut notation for iterations * * ata_for_each_link() iterates over each link of @ap according to * @mode. @link points to the current link in the loop. @link is * NULL after loop termination. ata_for_each_dev() works the same way * except that it iterates over each device of @link. * * Note that the mode prefixes ATA_{L|D}ITER_ shouldn't need to be * specified when using the following shorthand notations. Only the * mode itself (EDGE, HOST_FIRST, ENABLED, etc...) should be * specified. This not only increases brevity but also makes it * impossible to use ATA_LITER_* for device iteration or vice-versa. */ #define ata_for_each_link(link, ap, mode) \ for ((link) = ata_link_next(NULL, (ap), ATA_LITER_##mode); (link); \ (link) = ata_link_next((link), (ap), ATA_LITER_##mode)) #define ata_for_each_dev(dev, link, mode) \ for ((dev) = ata_dev_next(NULL, (link), ATA_DITER_##mode); (dev); \ (dev) = ata_dev_next((dev), (link), ATA_DITER_##mode)) /** * ata_ncq_enabled - Test whether NCQ is enabled * @dev: ATA device to test for * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * 1 if NCQ is enabled for @dev, 0 otherwise. */ static inline int ata_ncq_enabled(struct ata_device *dev) { if (!IS_ENABLED(CONFIG_SATA_HOST)) return 0; return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ_OFF | ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ; } static inline bool ata_fpdma_dsm_supported(struct ata_device *dev) { return (dev->flags & ATA_DFLAG_NCQ_SEND_RECV) && (dev->ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_DSM_OFFSET] & ATA_LOG_NCQ_SEND_RECV_DSM_TRIM); } static inline bool ata_fpdma_read_log_supported(struct ata_device *dev) { return (dev->flags & ATA_DFLAG_NCQ_SEND_RECV) && (dev->ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_RD_LOG_OFFSET] & ATA_LOG_NCQ_SEND_RECV_RD_LOG_SUPPORTED); } static inline bool ata_fpdma_zac_mgmt_in_supported(struct ata_device *dev) { return (dev->flags & ATA_DFLAG_NCQ_SEND_RECV) && (dev->ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_OFFSET] & ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_IN_SUPPORTED); } static inline bool ata_fpdma_zac_mgmt_out_supported(struct ata_device *dev) { return (dev->ncq_non_data_cmds[ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OFFSET] & ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OUT); } static inline void ata_qc_set_polling(struct ata_queued_cmd *qc) { qc->tf.ctl |= ATA_NIEN; } static inline struct ata_queued_cmd *__ata_qc_from_tag(struct ata_port *ap, unsigned int tag) { if (ata_tag_valid(tag)) return &ap->qcmd[tag]; return NULL; } static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, unsigned int tag) { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); if (unlikely(!qc) || !ap->ops->error_handler) return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | ATA_QCFLAG_FAILED)) == ATA_QCFLAG_ACTIVE) return qc; return NULL; } static inline unsigned int ata_qc_raw_nbytes(struct ata_queued_cmd *qc) { return qc->nbytes - min(qc->extrabytes, qc->nbytes); } static inline void ata_tf_init(struct ata_device *dev, struct ata_taskfile *tf) { memset(tf, 0, sizeof(*tf)); #ifdef CONFIG_ATA_SFF tf->ctl = dev->link->ap->ctl; #else tf->ctl = ATA_DEVCTL_OBS; #endif if (dev->devno == 0) tf->device = ATA_DEVICE_OBS; else tf->device = ATA_DEVICE_OBS | ATA_DEV1; } static inline void ata_qc_reinit(struct ata_queued_cmd *qc) { qc->dma_dir = DMA_NONE; qc->sg = NULL; qc->flags = 0; qc->cursg = NULL; qc->cursg_ofs = 0; qc->nbytes = qc->extrabytes = qc->curbytes = 0; qc->n_elem = 0; qc->err_mask = 0; qc->sect_size = ATA_SECT_SIZE; ata_tf_init(qc->dev, &qc->tf); /* init result_tf such that it indicates normal completion */ qc->result_tf.command = ATA_DRDY; qc->result_tf.feature = 0; } static inline int ata_try_flush_cache(const struct ata_device *dev) { return ata_id_wcache_enabled(dev->id) || ata_id_has_flush(dev->id) || ata_id_has_flush_ext(dev->id); } static inline unsigned int ac_err_mask(u8 status) { if (status & (ATA_BUSY | ATA_DRQ)) return AC_ERR_HSM; if (status & (ATA_ERR | ATA_DF)) return AC_ERR_DEV; return 0; } static inline unsigned int __ac_err_mask(u8 status) { unsigned int mask = ac_err_mask(status); if (mask == 0) return AC_ERR_OTHER; return mask; } static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host) { return *(struct ata_port **)&host->hostdata[0]; } static inline int ata_check_ready(u8 status) { if (!(status & ATA_BUSY)) return 1; /* 0xff indicates either no device or device not ready */ if (status == 0xff) return -ENODEV; return 0; } static inline unsigned long ata_deadline(unsigned long from_jiffies, unsigned long timeout_msecs) { return from_jiffies + msecs_to_jiffies(timeout_msecs); } /* Don't open code these in drivers as there are traps. Firstly the range may change in future hardware and specs, secondly 0xFF means 'no DMA' but is > UDMA_0. Dyma ddreigiau */ static inline int ata_using_mwdma(struct ata_device *adev) { if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4) return 1; return 0; } static inline int ata_using_udma(struct ata_device *adev) { if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7) return 1; return 0; } static inline int ata_dma_enabled(struct ata_device *adev) { return (adev->dma_mode == 0xFF ? 0 : 1); } /************************************************************************** * PATA timings - drivers/ata/libata-pata-timings.c */ extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode); extern int ata_timing_compute(struct ata_device *, unsigned short, struct ata_timing *, int, int); extern void ata_timing_merge(const struct ata_timing *, const struct ata_timing *, struct ata_timing *, unsigned int); /************************************************************************** * PMP - drivers/ata/libata-pmp.c */ #ifdef CONFIG_SATA_PMP extern const struct ata_port_operations sata_pmp_port_ops; extern int sata_pmp_qc_defer_cmd_switch(struct ata_queued_cmd *qc); extern void sata_pmp_error_handler(struct ata_port *ap); #else /* CONFIG_SATA_PMP */ #define sata_pmp_port_ops sata_port_ops #define sata_pmp_qc_defer_cmd_switch ata_std_qc_defer #define sata_pmp_error_handler ata_std_error_handler #endif /* CONFIG_SATA_PMP */ /************************************************************************** * SFF - drivers/ata/libata-sff.c */ #ifdef CONFIG_ATA_SFF extern const struct ata_port_operations ata_sff_port_ops; extern const struct ata_port_operations ata_bmdma32_port_ops; /* PIO only, sg_tablesize and dma_boundary limits can be removed */ #define ATA_PIO_SHT(drv_name) \ ATA_BASE_SHT(drv_name), \ .sg_tablesize = LIBATA_MAX_PRD, \ .dma_boundary = ATA_DMA_BOUNDARY extern void ata_sff_dev_select(struct ata_port *ap, unsigned int device); extern u8 ata_sff_check_status(struct ata_port *ap); extern void ata_sff_pause(struct ata_port *ap); extern void ata_sff_dma_pause(struct ata_port *ap); extern int ata_sff_busy_sleep(struct ata_port *ap, unsigned long timeout_pat, unsigned long timeout); extern int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline); extern void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf); extern void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf); extern void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); extern void ata_sff_queue_work(struct work_struct *work); extern void ata_sff_queue_delayed_work(struct delayed_work *dwork, unsigned long delay); extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern irqreturn_t ata_sff_interrupt(int irq, void *dev_instance); extern void ata_sff_lost_interrupt(struct ata_port *ap); extern void ata_sff_freeze(struct ata_port *ap); extern void ata_sff_thaw(struct ata_port *ap); extern int ata_sff_prereset(struct ata_link *link, unsigned long deadline); extern unsigned int ata_sff_dev_classify(struct ata_device *dev, int present, u8 *r_err); extern int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask, unsigned long deadline); extern int ata_sff_softreset(struct ata_link *link, unsigned int *classes, unsigned long deadline); extern int sata_sff_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); extern void ata_sff_postreset(struct ata_link *link, unsigned int *classes); extern void ata_sff_drain_fifo(struct ata_queued_cmd *qc); extern void ata_sff_error_handler(struct ata_port *ap); extern void ata_sff_std_ports(struct ata_ioports *ioaddr); #ifdef CONFIG_PCI extern int ata_pci_sff_init_host(struct ata_host *host); extern int ata_pci_sff_prepare_host(struct pci_dev *pdev, const struct ata_port_info * const * ppi, struct ata_host **r_host); extern int ata_pci_sff_activate_host(struct ata_host *host, irq_handler_t irq_handler, struct scsi_host_template *sht); extern int ata_pci_sff_init_one(struct pci_dev *pdev, const struct ata_port_info * const * ppi, struct scsi_host_template *sht, void *host_priv, int hflags); #endif /* CONFIG_PCI */ #ifdef CONFIG_ATA_BMDMA extern const struct ata_port_operations ata_bmdma_port_ops; #define ATA_BMDMA_SHT(drv_name) \ ATA_BASE_SHT(drv_name), \ .sg_tablesize = LIBATA_MAX_PRD, \ .dma_boundary = ATA_DMA_BOUNDARY extern enum ata_completion_errors ata_bmdma_qc_prep(struct ata_queued_cmd *qc); extern unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc); extern enum ata_completion_errors ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc); extern unsigned int ata_bmdma_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern irqreturn_t ata_bmdma_interrupt(int irq, void *dev_instance); extern void ata_bmdma_error_handler(struct ata_port *ap); extern void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc); extern void ata_bmdma_irq_clear(struct ata_port *ap); extern void ata_bmdma_setup(struct ata_queued_cmd *qc); extern void ata_bmdma_start(struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_queued_cmd *qc); extern u8 ata_bmdma_status(struct ata_port *ap); extern int ata_bmdma_port_start(struct ata_port *ap); extern int ata_bmdma_port_start32(struct ata_port *ap); #ifdef CONFIG_PCI extern int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev); extern void ata_pci_bmdma_init(struct ata_host *host); extern int ata_pci_bmdma_prepare_host(struct pci_dev *pdev, const struct ata_port_info * const * ppi, struct ata_host **r_host); extern int ata_pci_bmdma_init_one(struct pci_dev *pdev, const struct ata_port_info * const * ppi, struct scsi_host_template *sht, void *host_priv, int hflags); #endif /* CONFIG_PCI */ #endif /* CONFIG_ATA_BMDMA */ /** * ata_sff_busy_wait - Wait for a port status register * @ap: Port to wait for. * @bits: bits that must be clear * @max: number of 10uS waits to perform * * Waits up to max*10 microseconds for the selected bits in the port's * status register to be cleared. * Returns final value of status register. * * LOCKING: * Inherited from caller. */ static inline u8 ata_sff_busy_wait(struct ata_port *ap, unsigned int bits, unsigned int max) { u8 status; do { udelay(10); status = ap->ops->sff_check_status(ap); max--; } while (status != 0xff && (status & bits) && (max > 0)); return status; } /** * ata_wait_idle - Wait for a port to be idle. * @ap: Port to wait for. * * Waits up to 10ms for port's BUSY and DRQ signals to clear. * Returns final value of status register. * * LOCKING: * Inherited from caller. */ static inline u8 ata_wait_idle(struct ata_port *ap) { u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); #ifdef ATA_DEBUG if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ))) ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n", status); #endif return status; } #endif /* CONFIG_ATA_SFF */ #endif /* __LINUX_LIBATA_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETLINK_H #define __LINUX_NETLINK_H #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/export.h> #include <net/scm.h> #include <uapi/linux/netlink.h> struct net; static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; } enum netlink_skb_flags { NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ __u32 portid; __u32 dst_group; __u32 flags; struct sock *sk; bool nsid_is_set; int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) void netlink_table_grab(void); void netlink_table_ungrab(void); #define NL_CFG_F_NONROOT_RECV (1 << 0) #define NL_CFG_F_NONROOT_SEND (1 << 1) /* optional Netlink kernel configuration parameters */ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sk); }; struct sock *__netlink_kernel_create(struct net *net, int unit, struct module *module, struct netlink_kernel_cfg *cfg); static inline struct sock * netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) { return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } /* this can be increased when necessary - don't expose to userland */ #define NETLINK_MAX_COOKIE_LEN 20 /** * struct netlink_ext_ack - netlink extended ACK report struct * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error * @policy: policy for a bad attribute * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; const struct nla_policy *policy; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; }; /* Always use this macro, this allows later putting the * message into a separate section or such for things * like translation or listing all possible messages. * Currently string formatting is not supported (due * to the lack of an output buffer.) */ #define NL_SET_ERR_MSG(extack, msg) do { \ static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) \ __extack->_msg = __msg; \ } while (0) #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ if ((extack)) { \ (extack)->bad_attr = (attr); \ (extack)->policy = (pol); \ } \ } while (0) #define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL) #define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do { \ static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) { \ __extack->_msg = __msg; \ __extack->bad_attr = (attr); \ __extack->policy = (pol); \ } \ } while (0) #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { u64 __cookie = cookie; if (!extack) return; memcpy(extack->cookie, &__cookie, sizeof(__cookie)); extack->cookie_len = sizeof(__cookie); } static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, u32 cookie) { u32 __cookie = cookie; if (!extack) return; memcpy(extack->cookie, &__cookie, sizeof(__cookie)); extack->cookie_len = sizeof(__cookie); } void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack); int netlink_has_listeners(struct sock *sk, unsigned int group); bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfilp(struct file *filp); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb); static inline struct sk_buff * netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *nskb; nskb = skb_clone(skb, gfp_mask); if (!nskb) return NULL; /* This is a large skb, set destructor callback to release head */ if (is_vmalloc_addr(skb->head)) nskb->destructor = skb->destructor; return nskb; } /* * skb should fit one page. This choice is good for headerless malloc. * But we should limit to 8K so that userspace does not have to * use enormous buffer sizes on recvmsg() calls just to avoid * MSG_TRUNC when PAGE_SIZE is very large. */ #if PAGE_SIZE < 8192UL #define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE) #else #define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL) #endif #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); void *data; /* the module that dump function belong to */ struct module *module; struct netlink_ext_ack *extack; u16 family; u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; bool strict_check; union { u8 ctx[48]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. */ long args[6]; }; }; struct netlink_notify { struct net *net; u32 portid; int protocol; }; struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; struct module *module; u32 min_dump_alloc; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control); static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control) { if (!control->module) control->module = THIS_MODULE; return __netlink_dump_start(ssk, skb, nlh, control); } struct netlink_tap { struct net_device *dev; struct module *module; struct list_head list; }; int netlink_add_tap(struct netlink_tap *nt); int netlink_remove_tap(struct netlink_tap *nt); bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, struct user_namespace *ns, int cap); bool netlink_ns_capable(const struct sk_buff *skb, struct user_namespace *ns, int cap); bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap); #endif /* __LINUX_NETLINK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_FLOW_DISSECTOR_H #define _NET_FLOW_DISSECTOR_H #include <linux/types.h> #include <linux/in6.h> #include <linux/siphash.h> #include <linux/string.h> #include <uapi/linux/if_ether.h> struct bpf_prog; struct net; struct sk_buff; /** * struct flow_dissector_key_control: * @thoff: Transport header offset */ struct flow_dissector_key_control { u16 thoff; u16 addr_type; u32 flags; }; #define FLOW_DIS_IS_FRAGMENT BIT(0) #define FLOW_DIS_FIRST_FRAG BIT(1) #define FLOW_DIS_ENCAPSULATION BIT(2) enum flow_dissect_ret { FLOW_DISSECT_RET_OUT_GOOD, FLOW_DISSECT_RET_OUT_BAD, FLOW_DISSECT_RET_PROTO_AGAIN, FLOW_DISSECT_RET_IPPROTO_AGAIN, FLOW_DISSECT_RET_CONTINUE, }; /** * struct flow_dissector_key_basic: * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) */ struct flow_dissector_key_basic { __be16 n_proto; u8 ip_proto; u8 padding; }; struct flow_dissector_key_tags { u32 flow_label; }; struct flow_dissector_key_vlan { union { struct { u16 vlan_id:12, vlan_dei:1, vlan_priority:3; }; __be16 vlan_tci; }; __be16 vlan_tpid; }; struct flow_dissector_mpls_lse { u32 mpls_ttl:8, mpls_bos:1, mpls_tc:3, mpls_label:20; }; #define FLOW_DIS_MPLS_MAX 7 struct flow_dissector_key_mpls { struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */ u8 used_lses; /* One bit set for each Label Stack Entry in use */ }; static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls, int lse_index) { mpls->used_lses |= 1 << lse_index; } #define FLOW_DIS_TUN_OPTS_MAX 255 /** * struct flow_dissector_key_enc_opts: * @data: tunnel option data * @len: length of tunnel option data * @dst_opt_type: tunnel option type */ struct flow_dissector_key_enc_opts { u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired * here but seems difficult to #include */ u8 len; __be16 dst_opt_type; }; struct flow_dissector_key_keyid { __be32 keyid; }; /** * struct flow_dissector_key_ipv4_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv4_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; }; /** * struct flow_dissector_key_ipv6_addrs: * @src: source ip address * @dst: destination ip address */ struct flow_dissector_key_ipv6_addrs { /* (src,dst) must be grouped, in the same way than in IP header */ struct in6_addr src; struct in6_addr dst; }; /** * struct flow_dissector_key_tipc: * @key: source node address combined with selector */ struct flow_dissector_key_tipc { __be32 key; }; /** * struct flow_dissector_key_addrs: * @v4addrs: IPv4 addresses * @v6addrs: IPv6 addresses */ struct flow_dissector_key_addrs { union { struct flow_dissector_key_ipv4_addrs v4addrs; struct flow_dissector_key_ipv6_addrs v6addrs; struct flow_dissector_key_tipc tipckey; }; }; /** * flow_dissector_key_arp: * @ports: Operation, source and target addresses for an ARP header * for Ethernet hardware addresses and IPv4 protocol addresses * sip: Sender IP address * tip: Target IP address * op: Operation * sha: Sender hardware address * tpa: Target hardware address */ struct flow_dissector_key_arp { __u32 sip; __u32 tip; __u8 op; unsigned char sha[ETH_ALEN]; unsigned char tha[ETH_ALEN]; }; /** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header * src: source port number * dst: destination port number */ struct flow_dissector_key_ports { union { __be32 ports; struct { __be16 src; __be16 dst; }; }; }; /** * flow_dissector_key_icmp: * type: ICMP type * code: ICMP code * id: session identifier */ struct flow_dissector_key_icmp { struct { u8 type; u8 code; }; u16 id; }; /** * struct flow_dissector_key_eth_addrs: * @src: source Ethernet address * @dst: destination Ethernet address */ struct flow_dissector_key_eth_addrs { /* (dst,src) must be grouped, in the same way than in ETH header */ unsigned char dst[ETH_ALEN]; unsigned char src[ETH_ALEN]; }; /** * struct flow_dissector_key_tcp: * @flags: flags */ struct flow_dissector_key_tcp { __be16 flags; }; /** * struct flow_dissector_key_ip: * @tos: tos * @ttl: ttl */ struct flow_dissector_key_ip { __u8 tos; __u8 ttl; }; /** * struct flow_dissector_key_meta: * @ingress_ifindex: ingress ifindex * @ingress_iftype: ingress interface type */ struct flow_dissector_key_meta { int ingress_ifindex; u16 ingress_iftype; }; /** * struct flow_dissector_key_ct: * @ct_state: conntrack state after converting with map * @ct_mark: conttrack mark * @ct_zone: conntrack zone * @ct_labels: conntrack labels */ struct flow_dissector_key_ct { u16 ct_state; u16 ct_zone; u32 ct_mark; u32 ct_labels[4]; }; /** * struct flow_dissector_key_hash: * @hash: hash value */ struct flow_dissector_key_hash { u32 hash; }; enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */ FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_MAX, }; #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) #define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) #define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) struct flow_dissector_key { enum flow_dissector_key_id key_id; size_t offset; /* offset of struct flow_dissector_key_* in target the struct */ }; struct flow_dissector { unsigned int used_keys; /* each bit repesents presence of one key id */ unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; struct flow_keys_basic { struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; }; struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_icmp icmp; /* 'addrs' must be the last member */ struct flow_dissector_key_addrs addrs; }; #define FLOW_KEYS_HASH_OFFSET \ offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) __be32 flow_get_u32_src(const struct flow_keys *flow); __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_basic_dissector; /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a * larger "hash" of a flow to allow definitively matching specific flows where * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so * that it can be used in CB of skb (see sch_choke for an example). */ #define FLOW_KEYS_DIGEST_LEN 16 struct flow_keys_digest { u8 data[FLOW_KEYS_DIGEST_LEN]; }; void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } u32 flow_hash_from_keys(struct flow_keys *keys); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, void *data, int thoff, int hlen); static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) { return flow_dissector->used_keys & (1 << key_id); } static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id, void *target_container) { return ((char *)target_container) + flow_dissector->offset[key_id]; } struct bpf_flow_dissector { struct bpf_flow_keys *flow_keys; const struct sk_buff *skb; void *data; void *data_end; }; static inline void flow_dissector_init_keys(struct flow_dissector_key_control *key_control, struct flow_dissector_key_basic *key_basic) { memset(key_control, 0, sizeof(*key_control)); memset(key_basic, 0, sizeof(*key_basic)); } #ifdef CONFIG_BPF_SYSCALL int flow_dissector_bpf_prog_attach_check(struct net *net, struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NETFILTER_INGRESS_H_ #define _NETFILTER_INGRESS_H_ #include <linux/netfilter.h> #include <linux/netdevice.h> #ifdef CONFIG_NETFILTER_INGRESS static inline bool nf_hook_ingress_active(const struct sk_buff *skb) { #ifdef CONFIG_JUMP_LABEL if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif return rcu_access_pointer(skb->dev->nf_hooks_ingress); } /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. */ if (unlikely(!e)) return 0; nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); ret = nf_hook_slow(skb, &state, e, 0); if (ret == 0) return -1; return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) { RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) { return 0; } static inline int nf_hook_ingress(struct sk_buff *skb) { return 0; } static inline void nf_hook_ingress_init(struct net_device *dev) {} #endif /* CONFIG_NETFILTER_INGRESS */ #endif /* _NETFILTER_INGRESS_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> */ #ifndef _EXT4_EXTENTS #define _EXT4_EXTENTS #include "ext4.h" /* * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks * becomes very small, so index split, in-depth growing and * other hard changes happen much more often. * This is for debug purposes only. */ #define AGGRESSIVE_TEST_ /* * With EXTENTS_STATS defined, the number of blocks and extents * are collected in the truncate path. They'll be shown at * umount time. */ #define EXTENTS_STATS__ /* * If CHECK_BINSEARCH is defined, then the results of the binary search * will also be checked by linear search. */ #define CHECK_BINSEARCH__ /* * If EXT_STATS is defined then stats numbers are collected. * These number will be displayed at umount time. */ #define EXT_STATS_ /* * ext4_inode has i_block array (60 bytes total). * The first 12 bytes store ext4_extent_header; * the remainder stores an array of ext4_extent. * For non-inode extent blocks, ext4_extent_tail * follows the array. */ /* * This is the extent tail on-disk structure. * All other extent structures are 12 bytes long. It turns out that * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which * covers all valid ext4 block sizes. Therefore, this tail structure can be * crammed into the end of the block without having to rebalance the tree. */ struct ext4_extent_tail { __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ }; /* * This is the extent on-disk structure. * It's used at the bottom of the tree. */ struct ext4_extent { __le32 ee_block; /* first logical block extent covers */ __le16 ee_len; /* number of blocks covered by extent */ __le16 ee_start_hi; /* high 16 bits of physical block */ __le32 ee_start_lo; /* low 32 bits of physical block */ }; /* * This is index on-disk structure. * It's used at all the levels except the bottom. */ struct ext4_extent_idx { __le32 ei_block; /* index covers logical blocks from 'block' */ __le32 ei_leaf_lo; /* pointer to the physical block of the next * * level. leaf or next index could be there */ __le16 ei_leaf_hi; /* high 16 bits of physical block */ __u16 ei_unused; }; /* * Each block (leaves and indexes), even inode-stored has header. */ struct ext4_extent_header { __le16 eh_magic; /* probably will support different formats */ __le16 eh_entries; /* number of valid entries */ __le16 eh_max; /* capacity of store in entries */ __le16 eh_depth; /* has tree real underlying blocks? */ __le32 eh_generation; /* generation of the tree */ }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) #define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) static inline struct ext4_extent_tail * find_ext4_extent_tail(struct ext4_extent_header *eh) { return (struct ext4_extent_tail *)(((void *)eh) + EXT4_EXTENT_TAIL_OFFSET(eh)); } /* * Array of ext4_ext_path contains path to some extent. * Creation/lookup routines use it for traversal/splitting/etc. * Truncate uses it to simulate recursive walking. */ struct ext4_ext_path { ext4_fsblk_t p_block; __u16 p_depth; __u16 p_maxdepth; struct ext4_extent *p_ext; struct ext4_extent_idx *p_idx; struct ext4_extent_header *p_hdr; struct buffer_head *p_bh; }; /* * Used to record a portion of a cluster found at the beginning or end * of an extent while traversing the extent tree during space removal. * A partial cluster may be removed if it does not contain blocks shared * with extents that aren't being deleted (tofree state). Otherwise, * it cannot be removed (nofree state). */ struct partial_cluster { ext4_fsblk_t pclu; /* physical cluster number */ ext4_lblk_t lblk; /* logical block number within logical cluster */ enum {initial, tofree, nofree} state; }; /* * structure for external API */ /* * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an * initialized extent. This is 2^15 and not (2^16 - 1), since we use the * MSB of ee_len field in the extent datastructure to signify if this * particular extent is an initialized extent or an unwritten (i.e. * preallocated). * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an * unwritten extent. * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an * unwritten one. In other words, if MSB of ee_len is set, it is an * unwritten extent with only one special scenario when ee_len = 0x8000. * In this case we can not have an unwritten extent of zero length and * thus we make it as a special case of initialized extent with 0x8000 length. * This way we get better extent-to-group alignment for initialized extents. * Hence, the maximum number of blocks we can have in an *initialized* * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767). */ #define EXT_INIT_MAX_LEN (1UL << 15) #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ ((struct ext4_extent *) (((char *) (__hdr__)) + \ sizeof(struct ext4_extent_header))) #define EXT_FIRST_INDEX(__hdr__) \ ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ sizeof(struct ext4_extent_header))) #define EXT_HAS_FREE_INDEX(__path__) \ (le16_to_cpu((__path__)->p_hdr->eh_entries) \ < le16_to_cpu((__path__)->p_hdr->eh_max)) #define EXT_LAST_EXTENT(__hdr__) \ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) #define EXT_LAST_INDEX(__hdr__) \ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) #define EXT_MAX_EXTENT(__hdr__) \ ((le16_to_cpu((__hdr__)->eh_max)) ? \ ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \ : 0) #define EXT_MAX_INDEX(__hdr__) \ ((le16_to_cpu((__hdr__)->eh_max)) ? \ ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0) static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) { return (struct ext4_extent_header *) EXT4_I(inode)->i_data; } static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) { return (struct ext4_extent_header *) bh->b_data; } static inline unsigned short ext_depth(struct inode *inode) { return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); } static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext) { /* We can not have an unwritten extent of zero length! */ BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } static inline int ext4_ext_is_unwritten(struct ext4_extent *ext) { /* Extent with ee_len of 0x8000 is treated as an initialized extent */ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); } static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) { return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? le16_to_cpu(ext->ee_len) : (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) { ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); } /* * ext4_ext_pblock: * combine low and high parts of physical block number into ext4_fsblk_t */ static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; block = le32_to_cpu(ex->ee_start_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; return block; } /* * ext4_idx_pblock: * combine low and high parts of a leaf physical block number into ext4_fsblk_t */ static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) { ext4_fsblk_t block; block = le32_to_cpu(ix->ei_leaf_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; return block; } /* * ext4_ext_store_pblock: * stores a large physical block number into an extent struct, * breaking it into parts */ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } /* * ext4_idx_store_pblock: * stores a large physical block number into an index struct, * breaking it into parts */ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) { ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } #endif /* _EXT4_EXTENTS */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 /* SPDX-License-Identifier: GPL-2.0 */ /* * fscrypt.h: declarations for per-file encryption * * Filesystems that implement per-file encryption must include this header * file. * * Copyright (C) 2015, Google, Inc. * * Written by Michael Halcrow, 2015. * Modified by Jaegeuk Kim, 2015. */ #ifndef _LINUX_FSCRYPT_H #define _LINUX_FSCRYPT_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/slab.h> #include <uapi/linux/fscrypt.h> #define FS_CRYPTO_BLOCK_SIZE 16 union fscrypt_policy; struct fscrypt_info; struct seq_file; struct fscrypt_str { unsigned char *name; u32 len; }; struct fscrypt_name { const struct qstr *usr_fname; struct fscrypt_str disk_name; u32 hash; u32 minor_hash; struct fscrypt_str crypto_buf; bool is_nokey_name; }; #define FSTR_INIT(n, l) { .name = n, .len = l } #define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) #define fname_name(p) ((p)->disk_name.name) #define fname_len(p) ((p)->disk_name.len) /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 40 #ifdef CONFIG_FS_ENCRYPTION /* * fscrypt superblock flags */ #define FS_CFLG_OWN_PAGES (1U << 1) /* * crypto operations for filesystems */ struct fscrypt_operations { unsigned int flags; const char *key_prefix; int (*get_context)(struct inode *inode, void *ctx, size_t len); int (*set_context)(struct inode *inode, const void *ctx, size_t len, void *fs_data); const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb); bool (*empty_dir)(struct inode *inode); unsigned int max_namelen; bool (*has_stable_inodes)(struct super_block *sb); void (*get_ino_and_lblk_bits)(struct super_block *sb, int *ino_bits_ret, int *lblk_bits_ret); int (*get_num_devices)(struct super_block *sb); void (*get_devices)(struct super_block *sb, struct request_queue **devs); }; static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info(). * I.e., another task may publish ->i_crypt_info concurrently, executing * a RELEASE barrier. We need to use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. */ return smp_load_acquire(&inode->i_crypt_info); } /** * fscrypt_needs_contents_encryption() - check whether an inode needs * contents encryption * @inode: the inode to check * * Return: %true iff the inode is an encrypted regular file and the kernel was * built with fscrypt support. * * If you need to know whether the encrypt bit is set even when the kernel was * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. */ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) { return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); } /* * When d_splice_alias() moves a directory's no-key alias to its plaintext alias * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be * cleared. Note that we don't have to support arbitrary moves of this flag * because fscrypt doesn't allow no-key names to be the source or target of a * rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { dentry->d_flags &= ~DCACHE_NOKEY_NAME; } /** * fscrypt_is_nokey_name() - test whether a dentry is a no-key name * @dentry: the dentry to check * * This returns true if the dentry is a no-key dentry. A no-key dentry is a * dentry that was created in an encrypted directory that hasn't had its * encryption key added yet. Such dentries may be either positive or negative. * * When a filesystem is asked to create a new filename in an encrypted directory * and the new filename's dentry is a no-key dentry, it must fail the operation * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(), * ->rename(), and ->link(). (However, ->rename() and ->link() are already * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().) * * This is necessary because creating a filename requires the directory's * encryption key, but just checking for the key on the directory inode during * the final filesystem operation doesn't guarantee that the key was available * during the preceding dentry lookup. And the key must have already been * available during the dentry lookup in order for it to have been checked * whether the filename already exists in the directory and for the new file's * dentry not to be invalidated due to it incorrectly having the no-key flag. * * Return: %true if the dentry is a no-key name */ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) { return dentry->d_flags & DCACHE_NOKEY_NAME; } /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, gfp_t gfp_flags); int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs); int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num); static inline bool fscrypt_is_bounce_page(struct page *page) { return page->mapping == NULL; } static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { return (struct page *)page_private(bounce_page); } void fscrypt_free_bounce_page(struct page *bounce_page); /* policy.c */ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg); int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg); int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg); int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); int fscrypt_set_context(struct inode *inode, void *fs_data); struct fscrypt_dummy_policy { const union fscrypt_policy *policy; }; int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline void fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { kfree(dummy_policy->policy); dummy_policy->policy = NULL; } /* keyring.c */ void fscrypt_sb_free(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); /* keysetup.c */ int fscrypt_get_encryption_info(struct inode *inode); int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret); void fscrypt_put_encryption_info(struct inode *inode); void fscrypt_free_inode(struct inode *inode); int fscrypt_drop_inode(struct inode *inode); /* fname.c */ int fscrypt_setup_filename(struct inode *inode, const struct qstr *iname, int lookup, struct fscrypt_name *fname); static inline void fscrypt_free_filename(struct fscrypt_name *fname) { kfree(fname->crypto_buf.name); } int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str); void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str); int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname); bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ void fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); /* hooks.c */ int fscrypt_file_open(struct inode *inode, struct file *filp); int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry); int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link); const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done); int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat); static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { sb->s_cop = s_cop; } #else /* !CONFIG_FS_ENCRYPTION */ static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) { return NULL; } static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) { return false; } static inline void fscrypt_handle_d_move(struct dentry *dentry) { } static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) { return false; } /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { return -EOPNOTSUPP; } static inline int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs) { return -EOPNOTSUPP; } static inline int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { return -EOPNOTSUPP; } static inline bool fscrypt_is_bounce_page(struct page *page) { return false; } static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } static inline void fscrypt_free_bounce_page(struct page *bounce_page) { } /* policy.c */ static inline int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { return 0; } static inline int fscrypt_set_context(struct inode *inode, void *fs_data) { return -EOPNOTSUPP; } struct fscrypt_dummy_policy { }; static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb) { } static inline void fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { } /* keyring.c */ static inline void fscrypt_sb_free(struct super_block *sb) { } static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } /* keysetup.c */ static inline int fscrypt_get_encryption_info(struct inode *inode) { return -EOPNOTSUPP; } static inline int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; return 0; } static inline void fscrypt_put_encryption_info(struct inode *inode) { return; } static inline void fscrypt_free_inode(struct inode *inode) { } static inline int fscrypt_drop_inode(struct inode *inode) { return 0; } /* fname.c */ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(*fname)); fname->usr_fname = iname; fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; } static inline void fscrypt_free_filename(struct fscrypt_name *fname) { return; } static inline int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; } static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { return; } static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) { return -EOPNOTSUPP; } static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { /* Encryption support disabled; use standard comparison */ if (de_name_len != fname->disk_name.len) return false; return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } static inline u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) { WARN_ON_ONCE(1); return 0; } static inline int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { return 1; } /* bio.c */ static inline void fscrypt_decrypt_bio(struct bio *bio) { } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { return -EOPNOTSUPP; } /* hooks.c */ static inline int fscrypt_file_open(struct inode *inode, struct file *filp) { if (IS_ENCRYPTED(inode)) return -EOPNOTSUPP; return 0; } static inline int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { return -EOPNOTSUPP; } static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { return 0; } static inline int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; disk_link->name = (unsigned char *)target; disk_link->len = len + 1; if (disk_link->len > max_len) return -ENAMETOOLONG; return 0; } static inline int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { return -EOPNOTSUPP; } static inline const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done) { return ERR_PTR(-EOPNOTSUPP); } static inline int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat) { return -EOPNOTSUPP; } static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { } #endif /* !CONFIG_FS_ENCRYPTION */ /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode); void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask); void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask); bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk); bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return false; } static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask) { } static inline void fscrypt_set_bio_crypt_ctx_bh( struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask) { } static inline bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk) { return true; } static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh) { return true; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /** * fscrypt_inode_uses_inline_crypto() - test whether an inode uses inline * encryption * @inode: an inode. If encrypted, its key must be set up. * * Return: true if the inode requires file contents encryption and if the * encryption should be done in the block layer via blk-crypto rather * than in the filesystem layer. */ static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return fscrypt_needs_contents_encryption(inode) && __fscrypt_inode_uses_inline_crypto(inode); } /** * fscrypt_inode_uses_fs_layer_crypto() - test whether an inode uses fs-layer * encryption * @inode: an inode. If encrypted, its key must be set up. * * Return: true if the inode requires file contents encryption and if the * encryption should be done in the filesystem layer rather than in the * block layer via blk-crypto. */ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) { return fscrypt_needs_contents_encryption(inode) && !__fscrypt_inode_uses_inline_crypto(inode); } /** * fscrypt_has_encryption_key() - check whether an inode has had its key set up * @inode: the inode to check * * Return: %true if the inode has had its encryption key set up, else %false. * * Usually this should be preceded by fscrypt_get_encryption_info() to try to * set up the key first. */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { return fscrypt_get_info(inode) != NULL; } /** * fscrypt_require_key() - require an inode's encryption key * @inode: the inode we need the key for * * If the inode is encrypted, set up its encryption key if not already done. * Then require that the key be present and return -ENOKEY otherwise. * * No locks are needed, and the key will live as long as the struct inode --- so * it won't go away from under you. * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code * if a problem occurred while setting up the encryption key. */ static inline int fscrypt_require_key(struct inode *inode) { if (IS_ENCRYPTED(inode)) { int err = fscrypt_get_encryption_info(inode); if (err) return err; if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } return 0; } /** * fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted * directory * @old_dentry: an existing dentry for the inode being linked * @dir: the target directory * @dentry: negative dentry for the target filename * * A new link can only be added to an encrypted directory if the directory's * encryption key is available --- since otherwise we'd have no way to encrypt * the filename. Therefore, we first set up the directory's encryption key (if * not already done) and return an error if it's unavailable. * * We also verify that the link will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, * -EXDEV if the link would result in an inconsistent encryption policy, or * another -errno code. */ static inline int fscrypt_prepare_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_link(d_inode(old_dentry), dir, dentry); return 0; } /** * fscrypt_prepare_rename() - prepare for a rename between possibly-encrypted * directories * @old_dir: source directory * @old_dentry: dentry for source file * @new_dir: target directory * @new_dentry: dentry for target location (may be negative unless exchanging) * @flags: rename flags (we care at least about %RENAME_EXCHANGE) * * Prepare for ->rename() where the source and/or target directories may be * encrypted. A new link can only be added to an encrypted directory if the * directory's encryption key is available --- since otherwise we'd have no way * to encrypt the filename. A rename to an existing name, on the other hand, * *is* cryptographically possible without the key. However, we take the more * conservative approach and just forbid all no-key renames. * * We also verify that the rename will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the * rename would cause inconsistent encryption policies, or another -errno code. */ static inline int fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) return __fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, flags); return 0; } /** * fscrypt_prepare_lookup() - prepare to lookup a name in a possibly-encrypted * directory * @dir: directory being searched * @dentry: filename being looked up * @fname: (output) the name to use to search the on-disk directory * * Prepare for ->lookup() in a directory which may be encrypted by determining * the name that will actually be used to search the directory on-disk. If the * directory's encryption key is available, then the lookup is assumed to be by * plaintext name; otherwise, it is assumed to be by no-key name. * * This also installs a custom ->d_revalidate() method which will invalidate the * dentry if it was created without the key and the key is later added. * * Return: 0 on success; -ENOENT if the directory's key is unavailable but the * filename isn't a valid no-key name, so a negative dentry should be created; * or another -errno code. */ static inline int fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_lookup(dir, dentry, fname); memset(fname, 0, sizeof(*fname)); fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; return 0; } /** * fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's * attributes * @dentry: dentry through which the inode is being changed * @attr: attributes to change * * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, * most attribute changes are allowed even without the encryption key. However, * without the encryption key we do have to forbid truncates. This is needed * because the size being truncated to may not be a multiple of the filesystem * block size, and in that case we'd have to decrypt the final block, zero the * portion past i_size, and re-encrypt it. (We *could* allow truncating to a * filesystem block boundary, but it's simpler to just forbid all truncates --- * and we already forbid all other contents modifications without the key.) * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code * if a problem occurred while setting up the encryption key. */ static inline int fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { if (attr->ia_valid & ATTR_SIZE) return fscrypt_require_key(d_inode(dentry)); return 0; } /** * fscrypt_encrypt_symlink() - encrypt the symlink target if needed * @inode: symlink inode * @target: plaintext symlink target * @len: length of @target excluding null terminator * @disk_link: (in/out) the on-disk symlink target being prepared * * If the symlink target needs to be encrypted, then this function encrypts it * into @disk_link->name. fscrypt_prepare_symlink() must have been called * previously to compute @disk_link->len. If the filesystem did not allocate a * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one * will be kmalloc()'ed and the filesystem will be responsible for freeing it. * * Return: 0 on success, -errno on failure */ static inline int fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { if (IS_ENCRYPTED(inode)) return __fscrypt_encrypt_symlink(inode, target, len, disk_link); return 0; } /* If *pagep is a bounce page, free it and set *pagep to the pagecache page */ static inline void fscrypt_finalize_bounce_page(struct page **pagep) { struct page *page = *pagep; if (fscrypt_is_bounce_page(page)) { *pagep = fscrypt_pagecache_page(page); fscrypt_free_bounce_page(page); } } #endif /* _LINUX_FSCRYPT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM vmscan #if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_VMSCAN_H #include <linux/types.h> #include <linux/tracepoint.h> #include <linux/mm.h> #include <linux/memcontrol.h> #include <trace/events/mmflags.h> #define RECLAIM_WB_ANON 0x0001u #define RECLAIM_WB_FILE 0x0002u #define RECLAIM_WB_MIXED 0x0010u #define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */ #define RECLAIM_WB_ASYNC 0x0008u #define RECLAIM_WB_LRU (RECLAIM_WB_ANON|RECLAIM_WB_FILE) #define show_reclaim_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {RECLAIM_WB_ANON, "RECLAIM_WB_ANON"}, \ {RECLAIM_WB_FILE, "RECLAIM_WB_FILE"}, \ {RECLAIM_WB_MIXED, "RECLAIM_WB_MIXED"}, \ {RECLAIM_WB_SYNC, "RECLAIM_WB_SYNC"}, \ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" #define trace_reclaim_flags(file) ( \ (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ ) TRACE_EVENT(mm_vmscan_kswapd_sleep, TP_PROTO(int nid), TP_ARGS(nid), TP_STRUCT__entry( __field( int, nid ) ), TP_fast_assign( __entry->nid = nid; ), TP_printk("nid=%d", __entry->nid) ); TRACE_EVENT(mm_vmscan_kswapd_wake, TP_PROTO(int nid, int zid, int order), TP_ARGS(nid, zid, order), TP_STRUCT__entry( __field( int, nid ) __field( int, zid ) __field( int, order ) ), TP_fast_assign( __entry->nid = nid; __entry->zid = zid; __entry->order = order; ), TP_printk("nid=%d order=%d", __entry->nid, __entry->order) ); TRACE_EVENT(mm_vmscan_wakeup_kswapd, TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags), TP_ARGS(nid, zid, order, gfp_flags), TP_STRUCT__entry( __field( int, nid ) __field( int, zid ) __field( int, order ) __field( gfp_t, gfp_flags ) ), TP_fast_assign( __entry->nid = nid; __entry->zid = zid; __entry->order = order; __entry->gfp_flags = gfp_flags; ), TP_printk("nid=%d order=%d gfp_flags=%s", __entry->nid, __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags), TP_STRUCT__entry( __field( int, order ) __field( gfp_t, gfp_flags ) ), TP_fast_assign( __entry->order = order; __entry->gfp_flags = gfp_flags; ), TP_printk("order=%d gfp_flags=%s", __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); #ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); #endif /* CONFIG_MEMCG */ DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed), TP_STRUCT__entry( __field( unsigned long, nr_reclaimed ) ), TP_fast_assign( __entry->nr_reclaimed = nr_reclaimed; ), TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #endif /* CONFIG_MEMCG */ TRACE_EVENT(mm_shrink_slab_start, TP_PROTO(struct shrinker *shr, struct shrink_control *sc, long nr_objects_to_shrink, unsigned long cache_items, unsigned long long delta, unsigned long total_scan, int priority), TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan, priority), TP_STRUCT__entry( __field(struct shrinker *, shr) __field(void *, shrink) __field(int, nid) __field(long, nr_objects_to_shrink) __field(gfp_t, gfp_flags) __field(unsigned long, cache_items) __field(unsigned long long, delta) __field(unsigned long, total_scan) __field(int, priority) ), TP_fast_assign( __entry->shr = shr; __entry->shrink = shr->scan_objects; __entry->nid = sc->nid; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = sc->gfp_mask; __entry->cache_items = cache_items; __entry->delta = delta; __entry->total_scan = total_scan; __entry->priority = priority; ), TP_printk("%pS %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", __entry->shrink, __entry->shr, __entry->nid, __entry->nr_objects_to_shrink, show_gfp_flags(__entry->gfp_flags), __entry->cache_items, __entry->delta, __entry->total_scan, __entry->priority) ); TRACE_EVENT(mm_shrink_slab_end, TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval, long unused_scan_cnt, long new_scan_cnt, long total_scan), TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt, total_scan), TP_STRUCT__entry( __field(struct shrinker *, shr) __field(int, nid) __field(void *, shrink) __field(long, unused_scan) __field(long, new_scan) __field(int, retval) __field(long, total_scan) ), TP_fast_assign( __entry->shr = shr; __entry->nid = nid; __entry->shrink = shr->scan_objects; __entry->unused_scan = unused_scan_cnt; __entry->new_scan = new_scan_cnt; __entry->retval = shrinker_retval; __entry->total_scan = total_scan; ), TP_printk("%pS %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", __entry->shrink, __entry->shr, __entry->nid, __entry->unused_scan, __entry->new_scan, __entry->total_scan, __entry->retval) ); TRACE_EVENT(mm_vmscan_lru_isolate, TP_PROTO(int highest_zoneidx, int order, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_skipped, unsigned long nr_taken, isolate_mode_t isolate_mode, int lru), TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru), TP_STRUCT__entry( __field(int, highest_zoneidx) __field(int, order) __field(unsigned long, nr_requested) __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) __field(isolate_mode_t, isolate_mode) __field(int, lru) ), TP_fast_assign( __entry->highest_zoneidx = highest_zoneidx; __entry->order = order; __entry->nr_requested = nr_requested; __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; __entry->isolate_mode = isolate_mode; __entry->lru = lru; ), /* * classzone is previous name of the highest_zoneidx. * Reason not to change it is the ABI requirement of the tracepoint. */ TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", __entry->isolate_mode, __entry->highest_zoneidx, __entry->order, __entry->nr_requested, __entry->nr_scanned, __entry->nr_skipped, __entry->nr_taken, __print_symbolic(__entry->lru, LRU_NAMES)) ); TRACE_EVENT(mm_vmscan_writepage, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field(unsigned long, pfn) __field(int, reclaim_flags) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->reclaim_flags = trace_reclaim_flags( page_is_file_lru(page)); ), TP_printk("page=%p pfn=%lu flags=%s", pfn_to_page(__entry->pfn), __entry->pfn, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_lru_shrink_inactive, TP_PROTO(int nid, unsigned long nr_scanned, unsigned long nr_reclaimed, struct reclaim_stat *stat, int priority, int file), TP_ARGS(nid, nr_scanned, nr_reclaimed, stat, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_scanned) __field(unsigned long, nr_reclaimed) __field(unsigned long, nr_dirty) __field(unsigned long, nr_writeback) __field(unsigned long, nr_congested) __field(unsigned long, nr_immediate) __field(unsigned int, nr_activate0) __field(unsigned int, nr_activate1) __field(unsigned long, nr_ref_keep) __field(unsigned long, nr_unmap_fail) __field(int, priority) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->nr_scanned = nr_scanned; __entry->nr_reclaimed = nr_reclaimed; __entry->nr_dirty = stat->nr_dirty; __entry->nr_writeback = stat->nr_writeback; __entry->nr_congested = stat->nr_congested; __entry->nr_immediate = stat->nr_immediate; __entry->nr_activate0 = stat->nr_activate[0]; __entry->nr_activate1 = stat->nr_activate[1]; __entry->nr_ref_keep = stat->nr_ref_keep; __entry->nr_unmap_fail = stat->nr_unmap_fail; __entry->priority = priority; __entry->reclaim_flags = trace_reclaim_flags(file); ), TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s", __entry->nid, __entry->nr_scanned, __entry->nr_reclaimed, __entry->nr_dirty, __entry->nr_writeback, __entry->nr_congested, __entry->nr_immediate, __entry->nr_activate0, __entry->nr_activate1, __entry->nr_ref_keep, __entry->nr_unmap_fail, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_lru_shrink_active, TP_PROTO(int nid, unsigned long nr_taken, unsigned long nr_active, unsigned long nr_deactivated, unsigned long nr_referenced, int priority, int file), TP_ARGS(nid, nr_taken, nr_active, nr_deactivated, nr_referenced, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_taken) __field(unsigned long, nr_active) __field(unsigned long, nr_deactivated) __field(unsigned long, nr_referenced) __field(int, priority) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->nr_taken = nr_taken; __entry->nr_active = nr_active; __entry->nr_deactivated = nr_deactivated; __entry->nr_referenced = nr_referenced; __entry->priority = priority; __entry->reclaim_flags = trace_reclaim_flags(file); ), TP_printk("nid=%d nr_taken=%ld nr_active=%ld nr_deactivated=%ld nr_referenced=%ld priority=%d flags=%s", __entry->nid, __entry->nr_taken, __entry->nr_active, __entry->nr_deactivated, __entry->nr_referenced, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_inactive_list_is_low, TP_PROTO(int nid, int reclaim_idx, unsigned long total_inactive, unsigned long inactive, unsigned long total_active, unsigned long active, unsigned long ratio, int file), TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file), TP_STRUCT__entry( __field(int, nid) __field(int, reclaim_idx) __field(unsigned long, total_inactive) __field(unsigned long, inactive) __field(unsigned long, total_active) __field(unsigned long, active) __field(unsigned long, ratio) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->reclaim_idx = reclaim_idx; __entry->total_inactive = total_inactive; __entry->inactive = inactive; __entry->total_active = total_active; __entry->active = active; __entry->ratio = ratio; __entry->reclaim_flags = trace_reclaim_flags(file) & RECLAIM_WB_LRU; ), TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s", __entry->nid, __entry->reclaim_idx, __entry->total_inactive, __entry->inactive, __entry->total_active, __entry->active, __entry->ratio, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_node_reclaim_begin, TP_PROTO(int nid, int order, gfp_t gfp_flags), TP_ARGS(nid, order, gfp_flags), TP_STRUCT__entry( __field(int, nid) __field(int, order) __field(gfp_t, gfp_flags) ), TP_fast_assign( __entry->nid = nid; __entry->order = order; __entry->gfp_flags = gfp_flags; ), TP_printk("nid=%d order=%d gfp_flags=%s", __entry->nid, __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * AEAD: Authenticated Encryption with Associated Data * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_AEAD_H #define _CRYPTO_AEAD_H #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/slab.h> /** * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API * * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD * (listed as type "aead" in /proc/crypto) * * The most prominent examples for this type of encryption is GCM and CCM. * However, the kernel supports other types of AEAD ciphers which are defined * with the following cipher string: * * authenc(keyed message digest, block cipher) * * For example: authenc(hmac(sha256), cbc(aes)) * * The example code provided for the symmetric key cipher operation * applies here as well. Naturally all *skcipher* symbols must be exchanged * the *aead* pendants discussed in the following. In addition, for the AEAD * operation, the aead_request_set_ad function must be used to set the * pointer to the associated data memory location before performing the * encryption or decryption operation. In case of an encryption, the associated * data memory is filled during the encryption operation. For decryption, the * associated data memory must contain data that is used to verify the integrity * of the decrypted data. Another deviation from the asynchronous block cipher * operation is that the caller should explicitly check for -EBADMSG of the * crypto_aead_decrypt. That error indicates an authentication error, i.e. * a breach in the integrity of the message. In essence, that -EBADMSG error * code is the key bonus an AEAD cipher has over "standard" block chaining * modes. * * Memory Structure: * * The source scatterlist must contain the concatenation of * associated data || plaintext or ciphertext. * * The destination scatterlist has the same layout, except that the plaintext * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size * during encryption (resp. decryption). * * In-place encryption/decryption is enabled by using the same scatterlist * pointer for both the source and destination. * * Even in the out-of-place case, space must be reserved in the destination for * the associated data, even though it won't be written to. This makes the * in-place and out-of-place cases more consistent. It is permissible for the * "destination" associated data to alias the "source" associated data. * * As with the other scatterlist crypto APIs, zero-length scatterlist elements * are not allowed in the used part of the scatterlist. Thus, if there is no * associated data, the first element must point to the plaintext/ciphertext. * * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309, * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes * of the associated data buffer must contain a second copy of the IV. This is * in addition to the copy passed to aead_request_set_crypt(). These two IV * copies must not differ; different implementations of the same algorithm may * behave differently in that case. Note that the algorithm might not actually * treat the IV as associated data; nevertheless the length passed to * aead_request_set_ad() must include it. */ struct crypto_aead; /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests * @assoclen: Length in bytes of associated data for authentication * @cryptlen: Length of data to be encrypted or decrypted * @iv: Initialisation vector * @src: Source data * @dst: Destination data * @__ctx: Start of private context data */ struct aead_request { struct crypto_async_request base; unsigned int assoclen; unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; /** * struct aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the * transformation. A transformation may support smaller tag sizes. * As the authentication tag is a message digest to ensure the * integrity of the encrypted data, a consumer typically wants the * largest authentication tag possible as defined by this * variable. * @setauthsize: Set authentication size for the AEAD transformation. This * function is used to specify the consumer requested size of the * authentication tag to be either generated by the transformation * during encryption or the size of the authentication tag to be * supplied during the decryption operation. This function is also * responsible for checking the authentication tag size for * validity. * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @base: Definition of a generic crypto cipher algorithm. * * All fields except @ivsize is mandatory and must be filled. */ struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); int (*encrypt)(struct aead_request *req); int (*decrypt)(struct aead_request *req); int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; struct crypto_alg base; }; struct crypto_aead { unsigned int authsize; unsigned int reqsize; struct crypto_tfm base; }; static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_aead, base); } /** * crypto_alloc_aead() - allocate AEAD cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * AEAD cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an AEAD. The returned struct * crypto_aead is the cipher handle that is required for any subsequent * API invocation for that AEAD. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) { return &tfm->base; } /** * crypto_free_aead() - zeroize and free aead handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_aead(struct crypto_aead *tfm) { crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm)); } static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) { return container_of(crypto_aead_tfm(tfm)->__crt_alg, struct aead_alg, base); } static inline unsigned int crypto_aead_alg_ivsize(struct aead_alg *alg) { return alg->ivsize; } /** * crypto_aead_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the aead referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) { return crypto_aead_alg_ivsize(crypto_aead_alg(tfm)); } /** * crypto_aead_authsize() - obtain maximum authentication data size * @tfm: cipher handle * * The maximum size of the authentication data for the AEAD cipher referenced * by the AEAD cipher handle is returned. The authentication data size may be * zero if the cipher implements a hard-coded maximum. * * The authentication data may also be known as "tag value". * * Return: authentication data size / tag size in bytes */ static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) { return tfm->authsize; } static inline unsigned int crypto_aead_alg_maxauthsize(struct aead_alg *alg) { return alg->maxauthsize; } static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead) { return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead)); } /** * crypto_aead_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the AEAD referenced with the cipher handle is returned. * The caller may use that information to allocate appropriate memory for the * data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) { return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); } static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) { return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); } static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) { return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); } static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); } static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); } /** * crypto_aead_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the AEAD referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); /** * crypto_aead_setauthsize() - set authentication data size * @tfm: cipher handle * @authsize: size of the authentication data / tag in bytes * * Set the authentication data size / tag size. AEAD requires an authentication * tag (or MAC) in addition to the associated data. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) { return __crypto_aead_cast(req->base.tfm); } /** * crypto_aead_encrypt() - encrypt plaintext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The encryption operation creates the authentication data / * tag. That data is concatenated with the created ciphertext. * The ciphertext memory size is therefore the given number of * block cipher blocks + the size defined by the * crypto_aead_setauthsize invocation. The caller must ensure * that sufficient memory is available for the ciphertext and * the authentication tag. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_aead_encrypt(struct aead_request *req); /** * crypto_aead_decrypt() - decrypt ciphertext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the * authentication data / tag. That authentication data / tag * must have the size defined by the crypto_aead_setauthsize * invocation. * * * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD * cipher operation performs the authentication of the data during the * decryption operation. Therefore, the function returns this error if * the authentication of the ciphertext was unsuccessful (i.e. the * integrity of the ciphertext or the associated data was violated); * < 0 if an error occurred. */ int crypto_aead_decrypt(struct aead_request *req); /** * DOC: Asynchronous AEAD Request Handle * * The aead_request data structure contains all pointers to data required for * the AEAD cipher operation. This includes the cipher handle (which can be * used by multiple aead_request instances), pointer to plaintext and * ciphertext, asynchronous callback function, etc. It acts as a handle to the * aead_request_* API calls in a similar way as AEAD handle to the * crypto_aead_* API calls. */ /** * crypto_aead_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) { return tfm->reqsize; } /** * aead_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing aead handle in the request * data structure with a different one. */ static inline void aead_request_set_tfm(struct aead_request *req, struct crypto_aead *tfm) { req->base.tfm = crypto_aead_tfm(tfm); } /** * aead_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the AEAD * encrypt and decrypt API calls. During the allocation, the provided aead * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, gfp_t gfp) { struct aead_request *req; req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); if (likely(req)) aead_request_set_tfm(req, tfm); return req; } /** * aead_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void aead_request_free(struct aead_request *req) { kfree_sensitive(req); } /** * aead_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * Setting the callback function that is triggered once the cipher operation * completes * * The callback function is registered with the aead_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void aead_request_set_callback(struct aead_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * aead_request_set_crypt - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_aead_ivsize() * * Setting the source data and destination data scatter / gather lists which * hold the associated data concatenated with the plaintext or ciphertext. See * below for the authentication tag. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. * * The memory structure for cipher operation has the following structure: * * - AEAD encryption input: assoc data || plaintext * - AEAD encryption output: assoc data || cipherntext || auth tag * - AEAD decryption input: assoc data || ciphertext || auth tag * - AEAD decryption output: assoc data || plaintext * * Albeit the kernel requires the presence of the AAD buffer, however, * the kernel does not fill the AAD buffer in the output case. If the * caller wants to have that data buffer filled, the caller must either * use an in-place cipher operation (i.e. same memory location for * input/output memory location). */ static inline void aead_request_set_crypt(struct aead_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, u8 *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } /** * aead_request_set_ad - set associated data information * @req: request handle * @assoclen: number of bytes in associated data * * Setting the AD information. This function sets the length of * the associated data. */ static inline void aead_request_set_ad(struct aead_request *req, unsigned int assoclen) { req->assoclen = assoclen; } #endif /* _CRYPTO_AEAD_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 #ifndef __NET_SCHED_CODEL_IMPL_H #define __NET_SCHED_CODEL_IMPL_H /* * Codel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ /* Controlling Queue Delay (CoDel) algorithm * ========================================= * Source : Kathleen Nichols and Van Jacobson * http://queue.acm.org/detail.cfm?id=2209336 * * Implemented on linux by Dave Taht and Eric Dumazet */ static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ecn = false; } static void codel_vars_init(struct codel_vars *vars) { memset(vars, 0, sizeof(*vars)); } static void codel_stats_init(struct codel_stats *stats) { stats->maxpacket = 0; } /* * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } /* * CoDel control_law is t + interval/sqrt(count) * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid * both sqrt() and divide operation. */ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); } static bool codel_should_drop(const struct sk_buff *skb, void *ctx, struct codel_vars *vars, struct codel_params *params, struct codel_stats *stats, codel_skb_len_t skb_len_func, codel_skb_time_t skb_time_func, u32 *backlog, codel_time_t now) { bool ok_to_drop; u32 skb_len; if (!skb) { vars->first_above_time = 0; return false; } skb_len = skb_len_func(skb); vars->ldelay = now - skb_time_func(skb); if (unlikely(skb_len > stats->maxpacket)) stats->maxpacket = skb_len; if (codel_time_before(vars->ldelay, params->target) || *backlog <= params->mtu) { /* went below - stay below for at least interval */ vars->first_above_time = 0; return false; } ok_to_drop = false; if (vars->first_above_time == 0) { /* just went above from below. If we stay above * for at least interval we'll say it's ok to drop */ vars->first_above_time = now + params->interval; } else if (codel_time_after(now, vars->first_above_time)) { ok_to_drop = true; } return ok_to_drop; } static struct sk_buff *codel_dequeue(void *ctx, u32 *backlog, struct codel_params *params, struct codel_vars *vars, struct codel_stats *stats, codel_skb_len_t skb_len_func, codel_skb_time_t skb_time_func, codel_skb_drop_t drop_func, codel_skb_dequeue_t dequeue_func) { struct sk_buff *skb = dequeue_func(vars, ctx); codel_time_t now; bool drop; if (!skb) { vars->dropping = false; return skb; } now = codel_get_time(); drop = codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now); if (vars->dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ vars->dropping = false; } else if (codel_time_after_eq(now, vars->drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might * take us out of dropping state. * If not, schedule the next drop. * A large backlog might result in drop rates so high * that the next drop should happen now, * hence the while loop. */ while (vars->dropping && codel_time_after_eq(now, vars->drop_next)) { vars->count++; /* dont care of possible wrap * since there is no more divide */ codel_Newton_step(vars); if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; vars->drop_next = codel_control_law(vars->drop_next, params->interval, vars->rec_inv_sqrt); goto end; } stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); stats->drop_count++; skb = dequeue_func(vars, ctx); if (!codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now)) { /* leave dropping state */ vars->dropping = false; } else { /* and schedule the next drop */ vars->drop_next = codel_control_law(vars->drop_next, params->interval, vars->rec_inv_sqrt); } } } } else if (drop) { u32 delta; if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); stats->drop_count++; skb = dequeue_func(vars, ctx); drop = codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now); } vars->dropping = true; /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ delta = vars->count - vars->lastcount; if (delta > 1 && codel_time_before(now - vars->drop_next, 16 * params->interval)) { vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(vars); } else { vars->count = 1; vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } vars->lastcount = vars->count; vars->drop_next = codel_control_law(now, params->interval, vars->rec_inv_sqrt); } end: if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && INET_ECN_set_ce(skb)) stats->ce_mark++; return skb; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SIGNAL_H #define _LINUX_SIGNAL_H #include <linux/bug.h> #include <linux/signal_types.h> #include <linux/string.h> struct task_struct; /* for sysctl */ extern int print_fatal_signals; static inline void copy_siginfo(kernel_siginfo_t *to, const kernel_siginfo_t *from) { memcpy(to, from, sizeof(*to)); } static inline void clear_siginfo(kernel_siginfo_t *info) { memset(info, 0, sizeof(*info)); } #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo)) static inline void copy_siginfo_to_external(siginfo_t *to, const kernel_siginfo_t *from) { memcpy(to, from, sizeof(*from)); memset(((char *)to) + sizeof(struct kernel_siginfo), 0, SI_EXPANSION_SIZE); } int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from); int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from); enum siginfo_layout { SIL_KILL, SIL_TIMER, SIL_POLL, SIL_FAULT, SIL_FAULT_MCEERR, SIL_FAULT_BNDERR, SIL_FAULT_PKUERR, SIL_CHLD, SIL_RT, SIL_SYS, }; enum siginfo_layout siginfo_layout(unsigned sig, int si_code); /* * Define some primitives to manipulate sigset_t. */ #ifndef __HAVE_ARCH_SIG_BITOPS #include <linux/bitops.h> /* We don't use <linux/bitops.h> for these because there is no need to be atomic. */ static inline void sigaddset(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) set->sig[0] |= 1UL << sig; else set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW); } static inline void sigdelset(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) set->sig[0] &= ~(1UL << sig); else set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW)); } static inline int sigismember(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) return 1 & (set->sig[0] >> sig); else return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); } #endif /* __HAVE_ARCH_SIG_BITOPS */ static inline int sigisemptyset(sigset_t *set) { switch (_NSIG_WORDS) { case 4: return (set->sig[3] | set->sig[2] | set->sig[1] | set->sig[0]) == 0; case 2: return (set->sig[1] | set->sig[0]) == 0; case 1: return set->sig[0] == 0; default: BUILD_BUG(); return 0; } } static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2) { switch (_NSIG_WORDS) { case 4: return (set1->sig[3] == set2->sig[3]) && (set1->sig[2] == set2->sig[2]) && (set1->sig[1] == set2->sig[1]) && (set1->sig[0] == set2->sig[0]); case 2: return (set1->sig[1] == set2->sig[1]) && (set1->sig[0] == set2->sig[0]); case 1: return set1->sig[0] == set2->sig[0]; } return 0; } #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS #include <linux/string.h> #define _SIG_SET_BINOP(name, op) \ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ { \ unsigned long a0, a1, a2, a3, b0, b1, b2, b3; \ \ switch (_NSIG_WORDS) { \ case 4: \ a3 = a->sig[3]; a2 = a->sig[2]; \ b3 = b->sig[3]; b2 = b->sig[2]; \ r->sig[3] = op(a3, b3); \ r->sig[2] = op(a2, b2); \ fallthrough; \ case 2: \ a1 = a->sig[1]; b1 = b->sig[1]; \ r->sig[1] = op(a1, b1); \ fallthrough; \ case 1: \ a0 = a->sig[0]; b0 = b->sig[0]; \ r->sig[0] = op(a0, b0); \ break; \ default: \ BUILD_BUG(); \ } \ } #define _sig_or(x,y) ((x) | (y)) _SIG_SET_BINOP(sigorsets, _sig_or) #define _sig_and(x,y) ((x) & (y)) _SIG_SET_BINOP(sigandsets, _sig_and) #define _sig_andn(x,y) ((x) & ~(y)) _SIG_SET_BINOP(sigandnsets, _sig_andn) #undef _SIG_SET_BINOP #undef _sig_or #undef _sig_and #undef _sig_andn #define _SIG_SET_OP(name, op) \ static inline void name(sigset_t *set) \ { \ switch (_NSIG_WORDS) { \ case 4: set->sig[3] = op(set->sig[3]); \ set->sig[2] = op(set->sig[2]); \ fallthrough; \ case 2: set->sig[1] = op(set->sig[1]); \ fallthrough; \ case 1: set->sig[0] = op(set->sig[0]); \ break; \ default: \ BUILD_BUG(); \ } \ } #define _sig_not(x) (~(x)) _SIG_SET_OP(signotset, _sig_not) #undef _SIG_SET_OP #undef _sig_not static inline void sigemptyset(sigset_t *set) { switch (_NSIG_WORDS) { default: memset(set, 0, sizeof(sigset_t)); break; case 2: set->sig[1] = 0; fallthrough; case 1: set->sig[0] = 0; break; } } static inline void sigfillset(sigset_t *set) { switch (_NSIG_WORDS) { default: memset(set, -1, sizeof(sigset_t)); break; case 2: set->sig[1] = -1; fallthrough; case 1: set->sig[0] = -1; break; } } /* Some extensions for manipulating the low 32 signals in particular. */ static inline void sigaddsetmask(sigset_t *set, unsigned long mask) { set->sig[0] |= mask; } static inline void sigdelsetmask(sigset_t *set, unsigned long mask) { set->sig[0] &= ~mask; } static inline int sigtestsetmask(sigset_t *set, unsigned long mask) { return (set->sig[0] & mask) != 0; } static inline void siginitset(sigset_t *set, unsigned long mask) { set->sig[0] = mask; switch (_NSIG_WORDS) { default: memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = 0; break; case 1: ; } } static inline void siginitsetinv(sigset_t *set, unsigned long mask) { set->sig[0] = ~mask; switch (_NSIG_WORDS) { default: memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = -1; break; case 1: ; } } #endif /* __HAVE_ARCH_SIG_SETOPS */ static inline void init_sigpending(struct sigpending *sig) { sigemptyset(&sig->signal); INIT_LIST_HEAD(&sig->list); } extern void flush_sigqueue(struct sigpending *queue); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) { return sig <= _NSIG ? 1 : 0; } struct timespec; struct pt_regs; enum pid_type; extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; extern bool get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); #define SIG_KTHREAD ((__force __sighandler_t)2) #define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) static inline void allow_signal(int sig) { /* * Kernel threads handle their own signals. Let the signal code * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ kernel_sigaction(sig, SIG_KTHREAD); } static inline void allow_kernel_signal(int sig) { /* * Kernel threads handle their own signals. Let the signal code * know signals sent by the kernel will be handled, so that they * don't get silently dropped. */ kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) { kernel_sigaction(sig, SIG_IGN); } extern struct kmem_cache *sighand_cachep; extern bool unhandled_signal(struct task_struct *tsk, int sig); /* * In POSIX a signal is sent either to a specific thread (Linux task) * or to the process as a whole (Linux thread group). How the signal * is sent determines whether it's to one thread or the whole group, * which determines which signal mask(s) are involved in blocking it * from being delivered until later. When the signal is delivered, * either it's caught or ignored by a user handler or it has a default * effect that applies to the whole thread group (POSIX process). * * The possible effects an unblocked signal set to SIG_DFL can have are: * ignore - Nothing Happens * terminate - kill the process, i.e. all threads in the group, * similar to exit_group. The group leader (only) reports * WIFSIGNALED status to its parent. * coredump - write a core dump file describing all threads using * the same mm and then kill all those threads * stop - stop all the threads in the group, i.e. TASK_STOPPED state * * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored. * Other signals when not blocked and set to SIG_DFL behaves as follows. * The job control signals also have other special effects. * * +--------------------+------------------+ * | POSIX signal | default action | * +--------------------+------------------+ * | SIGHUP | terminate | * | SIGINT | terminate | * | SIGQUIT | coredump | * | SIGILL | coredump | * | SIGTRAP | coredump | * | SIGABRT/SIGIOT | coredump | * | SIGBUS | coredump | * | SIGFPE | coredump | * | SIGKILL | terminate(+) | * | SIGUSR1 | terminate | * | SIGSEGV | coredump | * | SIGUSR2 | terminate | * | SIGPIPE | terminate | * | SIGALRM | terminate | * | SIGTERM | terminate | * | SIGCHLD | ignore | * | SIGCONT | ignore(*) | * | SIGSTOP | stop(*)(+) | * | SIGTSTP | stop(*) | * | SIGTTIN | stop(*) | * | SIGTTOU | stop(*) | * | SIGURG | ignore | * | SIGXCPU | coredump | * | SIGXFSZ | coredump | * | SIGVTALRM | terminate | * | SIGPROF | terminate | * | SIGPOLL/SIGIO | terminate | * | SIGSYS/SIGUNUSED | coredump | * | SIGSTKFLT | terminate | * | SIGWINCH | ignore | * | SIGPWR | terminate | * | SIGRTMIN-SIGRTMAX | terminate | * +--------------------+------------------+ * | non-POSIX signal | default action | * +--------------------+------------------+ * | SIGEMT | coredump | * +--------------------+------------------+ * * (+) For SIGKILL and SIGSTOP the action is "always", not just "default". * (*) Special job control effects: * When SIGCONT is sent, it resumes the process (all threads in the group) * from TASK_STOPPED state and also clears any pending/queued stop signals * (any of those marked with "stop(*)"). This happens regardless of blocking, * catching, or ignoring SIGCONT. When any stop signal is sent, it clears * any pending/queued SIGCONT signals; this happens regardless of blocking, * catching, or ignored the stop signal, though (except for SIGSTOP) the * default action of stopping the process may happen later or never. */ #ifdef SIGEMT #define SIGEMT_MASK rt_sigmask(SIGEMT) #else #define SIGEMT_MASK 0 #endif #if SIGRTMIN > BITS_PER_LONG #define rt_sigmask(sig) (1ULL << ((sig)-1)) #else #define rt_sigmask(sig) sigmask(sig) #endif #define siginmask(sig, mask) \ ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) #define SIG_KERNEL_STOP_MASK (\ rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \ rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) ) #define SIG_KERNEL_COREDUMP_MASK (\ rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \ rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \ rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \ rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \ rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \ SIGEMT_MASK ) #define SIG_KERNEL_IGNORE_MASK (\ rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) #define SIG_SPECIFIC_SICODES_MASK (\ rt_sigmask(SIGILL) | rt_sigmask(SIGFPE) | \ rt_sigmask(SIGSEGV) | rt_sigmask(SIGBUS) | \ rt_sigmask(SIGTRAP) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGPOLL) | rt_sigmask(SIGSYS) | \ SIGEMT_MASK ) #define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) #define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) #define sig_specific_sicodes(sig) siginmask(sig, SIG_SPECIFIC_SICODES_MASK) #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) void signals_init(void); int restore_altstack(const stack_t __user *); int __save_altstack(stack_t __user *, unsigned long); #define unsafe_save_altstack(uss, sp, label) do { \ stack_t __user *__uss = uss; \ struct task_struct *t = current; \ unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \ unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ if (t->sas_ss_flags & SS_AUTODISARM) \ sas_ss_reset(t); \ } while (0); #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); #endif #endif /* _LINUX_SIGNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HUGETLB_H #define _LINUX_HUGETLB_H #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/fs.h> #include <linux/hugetlb_inline.h> #include <linux/cgroup.h> #include <linux/list.h> #include <linux/kref.h> #include <linux/pgtable.h> #include <linux/gfp.h> struct ctl_table; struct user_struct; struct mmu_gather; #ifndef is_hugepd typedef struct { unsigned long pd; } hugepd_t; #define is_hugepd(hugepd) (0) #define __hugepd(x) ((hugepd_t) { (x) }) #endif #ifdef CONFIG_HUGETLB_PAGE #include <linux/mempolicy.h> #include <linux/shm.h> #include <asm/tlbflush.h> struct hugepage_subpool { spinlock_t lock; long count; long max_hpages; /* Maximum huge pages or -1 if no maximum. */ long used_hpages; /* Used count against maximum, includes */ /* both alloced and reserved pages. */ struct hstate *hstate; long min_hpages; /* Minimum huge pages or -1 if no minimum. */ long rsv_hpages; /* Pages reserved against global pool to */ /* sasitfy minimum size. */ }; struct resv_map { struct kref refs; spinlock_t lock; struct list_head regions; long adds_in_progress; struct list_head region_cache; long region_cache_count; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored * here. If these fields are 0, then either the mapping is shared, or * cgroup accounting is disabled for this resv_map. */ struct page_counter *reservation_counter; unsigned long pages_per_hpage; struct cgroup_subsys_state *css; #endif }; /* * Region tracking -- allows tracking of reservations and instantiated pages * across the pages in a mapping. * * The region data structures are embedded into a resv_map and protected * by a resv_map's lock. The set of regions within the resv_map represent * reservations for huge pages, or huge pages that have already been * instantiated within the map. The from and to elements are huge page * indicies into the associated mapping. from indicates the starting index * of the region. to represents the first index past the end of the region. * * For example, a file region structure with from == 0 and to == 4 represents * four huge pages in a mapping. It is important to note that the to element * represents the first element past the end of the region. This is used in * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. * * Interval notation of the form [from, to) will be used to indicate that * the endpoint from is inclusive and to is exclusive. */ struct file_region { struct list_head link; long from; long to; #ifdef CONFIG_CGROUP_HUGETLB /* * On shared mappings, each reserved region appears as a struct * file_region in resv_map. These fields hold the info needed to * uncharge each reservation. */ struct page_counter *reservation_counter; struct cgroup_subsys_state *css; #endif }; extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); extern spinlock_t hugetlb_lock; extern int hugetlb_max_hstate __read_mostly; #define for_each_hstate(h) \ for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, long min_hpages); void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, unsigned long *, long, unsigned int, int *); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); void __unmap_hugepage_range_final(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page); void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; /* arch callbacks */ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); #else /* !CONFIG_HUGETLB_PAGE */ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } static inline unsigned long hugetlb_total_pages(void) { return 0; } static inline struct address_space *hugetlb_page_mapping_lock_write( struct page *hpage) { return NULL; } static inline int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long *addr, pte_t *ptep) { return 0; } static inline void adjust_range_if_pmd_sharing_possible( struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { } static inline long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, long i, unsigned int flags, int *nonblocking) { BUG(); return 0; } static inline struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { return ERR_PTR(-EINVAL); } static inline int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { BUG(); return 0; } static inline void hugetlb_report_meminfo(struct seq_file *m) { } static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) { return 0; } static inline void hugetlb_show_meminfo(void) { } static inline struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift) { return NULL; } static inline struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int flags) { return NULL; } static inline struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags) { return NULL; } static inline struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags) { return NULL; } static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { return -EINVAL; } static inline int pmd_huge(pmd_t pmd) { return 0; } static inline int pud_huge(pud_t pud) { return 0; } static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { return 0; } static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { BUG(); } static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep) { BUG(); return 0; } static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { return NULL; } static inline bool isolate_huge_page(struct page *page, struct list_head *list) { return false; } static inline void putback_active_hugepage(struct page *page) { } static inline void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) { } static inline unsigned long hugetlb_change_protection( struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) { return 0; } static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { BUG(); } static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { BUG(); } static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { BUG(); return 0; } #endif /* !CONFIG_HUGETLB_PAGE */ /* * hugepages at page global directory. If arch support * hugepages at pgd level, they need to define this. */ #ifndef pgd_huge #define pgd_huge(x) 0 #endif #ifndef p4d_huge #define p4d_huge(x) 0 #endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) { BUG(); return 0; } #endif #define HUGETLB_ANON_FILE "anon_hugepage" enum { /* * The file will be used as an shm file so shmfs accounting rules * apply */ HUGETLB_SHMFS_INODE = 1, /* * The file is being created on the internal vfs mount and shmfs * accounting rules do not apply */ HUGETLB_ANONHUGE_INODE = 2, }; #ifdef CONFIG_HUGETLBFS struct hugetlbfs_sb_info { long max_inodes; /* inodes allowed */ long free_inodes; /* inodes free */ spinlock_t stat_lock; struct hstate *hstate; struct hugepage_subpool *spool; kuid_t uid; kgid_t gid; umode_t mode; }; static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) { return sb->s_fs_info; } struct hugetlbfs_inode_info { struct shared_policy policy; struct inode vfs_inode; unsigned int seals; }; static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) { return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); } extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, struct user_struct **user, int creat_flags, int page_size_log); static inline bool is_file_hugepages(struct file *file) { if (file->f_op == &hugetlbfs_file_operations) return true; return is_file_shm_hugepages(file); } static inline struct hstate *hstate_inode(struct inode *i) { return HUGETLBFS_SB(i->i_sb)->hstate; } #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) false static inline struct file * hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags, int page_size_log) { return ERR_PTR(-ENOSYS); } static inline struct hstate *hstate_inode(struct inode *i) { return NULL; } #endif /* !CONFIG_HUGETLBFS */ #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ #ifdef CONFIG_HUGETLB_PAGE #define HSTATE_NAME_LEN 32 /* Defines one hugetlb page size */ struct hstate { int next_nid_to_alloc; int next_nid_to_free; unsigned int order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; unsigned long free_huge_pages; unsigned long resv_huge_pages; unsigned long surplus_huge_pages; unsigned long nr_overcommit_huge_pages; struct list_head hugepage_activelist; struct list_head hugepage_freelists[MAX_NUMNODES]; unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ struct cftype cgroup_files_dfl[7]; struct cftype cgroup_files_legacy[9]; #endif char name[HSTATE_NAME_LEN]; }; struct huge_bootmem_page { struct list_head list; struct hstate *hstate; }; struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h); int __init alloc_bootmem_huge_page(struct hstate *h); void __init hugetlb_add_hstate(unsigned order); bool __init arch_hugetlb_valid_size(unsigned long size); struct hstate *size_to_hstate(unsigned long size); #ifndef HUGE_MAX_HSTATE #define HUGE_MAX_HSTATE 1 #endif extern struct hstate hstates[HUGE_MAX_HSTATE]; extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) static inline struct hstate *hstate_file(struct file *f) { return hstate_inode(file_inode(f)); } static inline struct hstate *hstate_sizelog(int page_size_log) { if (!page_size_log) return &default_hstate; return size_to_hstate(1UL << page_size_log); } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) { return hstate_file(vma->vm_file); } static inline unsigned long huge_page_size(struct hstate *h) { return (unsigned long)PAGE_SIZE << h->order; } extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; } static inline unsigned int huge_page_order(struct hstate *h) { return h->order; } static inline unsigned huge_page_shift(struct hstate *h) { return h->order + PAGE_SHIFT; } static inline bool hstate_is_gigantic(struct hstate *h) { return huge_page_order(h) >= MAX_ORDER; } static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1 << h->order; } static inline unsigned int blocks_per_huge_page(struct hstate *h) { return huge_page_size(h) / 512; } #include <asm/hugetlb.h> #ifndef is_hugepage_only_range static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { return 0; } #define is_hugepage_only_range is_hugepage_only_range #endif #ifndef arch_clear_hugepage_flags static inline void arch_clear_hugepage_flags(struct page *page) { } #define arch_clear_hugepage_flags arch_clear_hugepage_flags #endif #ifndef arch_make_huge_pte static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable) { return entry; } #endif static inline struct hstate *page_hstate(struct page *page) { VM_BUG_ON_PAGE(!PageHuge(page), page); return size_to_hstate(page_size(page)); } static inline unsigned hstate_index_to_shift(unsigned index) { return hstates[index].order + PAGE_SHIFT; } static inline int hstate_index(struct hstate *h) { return h - hstates; } extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION #ifndef arch_hugetlb_migration_supported static inline bool arch_hugetlb_migration_supported(struct hstate *h) { if ((huge_page_shift(h) == PMD_SHIFT) || (huge_page_shift(h) == PUD_SHIFT) || (huge_page_shift(h) == PGDIR_SHIFT)) return true; else return false; } #endif #else static inline bool arch_hugetlb_migration_supported(struct hstate *h) { return false; } #endif static inline bool hugepage_migration_supported(struct hstate *h) { return arch_hugetlb_migration_supported(h); } /* * Movability check is different as compared to migration check. * It determines whether or not a huge page should be placed on * movable zone or not. Movability of any huge page should be * required only if huge page size is supported for migration. * There wont be any reason for the huge page to be movable if * it is not migratable to start with. Also the size of the huge * page should be large enough to be placed under a movable zone * and still feasible enough to be migratable. Just the presence * in movable zone does not make the migration feasible. * * So even though large huge page sizes like the gigantic ones * are migratable they should not be movable because its not * feasible to migrate them from movable zone. */ static inline bool hugepage_movable_supported(struct hstate *h) { if (!hugepage_migration_supported(h)) return false; if (hstate_is_gigantic(h)) return false; return true; } /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { if (hugepage_movable_supported(h)) return GFP_HIGHUSER_MOVABLE; else return GFP_HIGHUSER; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) { gfp_t modified_mask = htlb_alloc_mask(h); /* Some callers might want to enforce node */ modified_mask |= (gfp_mask & __GFP_THISNODE); modified_mask |= (gfp_mask & __GFP_NOWARN); return modified_mask; } static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { if (huge_page_size(h) == PMD_SIZE) return pmd_lockptr(mm, (pmd_t *) pte); VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); return &mm->page_table_lock; } #ifndef hugepages_supported /* * Some platform decide whether they support huge pages at boot * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 * when there is no such support */ #define hugepages_supported() (HPAGE_SHIFT != 0) #endif void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); static inline void hugetlb_count_init(struct mm_struct *mm) { atomic_long_set(&mm->hugetlb_usage, 0); } static inline void hugetlb_count_add(long l, struct mm_struct *mm) { atomic_long_add(l, &mm->hugetlb_usage); } static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { atomic_long_sub(l, &mm->hugetlb_usage); } #ifndef set_huge_swap_pte_at static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { set_huge_pte_at(mm, addr, ptep, pte); } #endif #ifndef huge_ptep_modify_prot_start #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); } #endif #ifndef huge_ptep_modify_prot_commit #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } #endif void set_page_huge_active(struct page *page); #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; static inline struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { return NULL; } static inline struct page * alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask) { return NULL; } static inline struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address) { return NULL; } static inline int __alloc_bootmem_huge_page(struct hstate *h) { return 0; } static inline struct hstate *hstate_file(struct file *f) { return NULL; } static inline struct hstate *hstate_sizelog(int page_size_log) { return NULL; } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) { return NULL; } static inline struct hstate *page_hstate(struct page *page) { return NULL; } static inline unsigned long huge_page_size(struct hstate *h) { return PAGE_SIZE; } static inline unsigned long huge_page_mask(struct hstate *h) { return PAGE_MASK; } static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) { return PAGE_SIZE; } static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { return PAGE_SIZE; } static inline unsigned int huge_page_order(struct hstate *h) { return 0; } static inline unsigned int huge_page_shift(struct hstate *h) { return PAGE_SHIFT; } static inline bool hstate_is_gigantic(struct hstate *h) { return false; } static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1; } static inline unsigned hstate_index_to_shift(unsigned index) { return 0; } static inline int hstate_index(struct hstate *h) { return 0; } static inline int dissolve_free_huge_page(struct page *page) { return 0; } static inline int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) { return 0; } static inline bool hugepage_migration_supported(struct hstate *h) { return false; } static inline bool hugepage_movable_supported(struct hstate *h) { return false; } static inline gfp_t htlb_alloc_mask(struct hstate *h) { return 0; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) { return 0; } static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { return &mm->page_table_lock; } static inline void hugetlb_count_init(struct mm_struct *mm) { } static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) { } static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { } static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { } #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { spinlock_t *ptl; ptl = huge_pte_lockptr(h, mm, pte); spin_lock(ptl); return ptl; } #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) extern void __init hugetlb_cma_reserve(int order); extern void __init hugetlb_cma_check(void); #else static inline __init void hugetlb_cma_reserve(int order) { } static inline __init void hugetlb_cma_check(void) { } #endif #endif /* _LINUX_HUGETLB_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* SPDX-License-Identifier: GPL-2.0 */ /* * memory buffer pool support */ #ifndef _LINUX_MEMPOOL_H #define _LINUX_MEMPOOL_H #include <linux/wait.h> #include <linux/compiler.h> struct kmem_cache; typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); typedef struct mempool_s { spinlock_t lock; int min_nr; /* nr of elements at *elements */ int curr_nr; /* Current nr of elements at *elements */ void **elements; void *pool_data; mempool_alloc_t *alloc; mempool_free_t *free; wait_queue_head_t wait; } mempool_t; static inline bool mempool_initialized(mempool_t *pool) { return pool->elements != NULL; } void mempool_exit(mempool_t *pool); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id); int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int nid); extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; extern void mempool_free(void *element, mempool_t *pool); /* * A mempool_alloc_t and mempool_free_t that get the memory from * a slab cache that is passed in through pool_data. * Note: the slab cache may not have a ctor function. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); static inline int mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) { return mempool_init(pool, min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc); } static inline mempool_t * mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) { return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc); } /* * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the * amount of memory specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) { return mempool_init(pool, min_nr, mempool_kmalloc, mempool_kfree, (void *) size); } static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) { return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, (void *) size); } /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that * allocates pages of the order specified by pool_data */ void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); void mempool_free_pages(void *element, void *pool_data); static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) { return mempool_init(pool, min_nr, mempool_alloc_pages, mempool_free_pages, (void *)(long)order); } static inline mempool_t *mempool_create_page_pool(int min_nr, int order) { return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, (void *)(long)order); } #endif /* _LINUX_MEMPOOL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/path.h> #include <linux/fcntl.h> #include <linux/errno.h> enum { MAX_NESTED_LINKS = 8 }; #define MAXSYMLINKS 40 /* * Type of the last component on LOOKUP_PARENT */ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ #define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ #define LOOKUP_DIRECTORY 0x0002 /* require a directory */ #define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ #define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ #define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ #define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */ #define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ #define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ /* These tell filesystem methods that we are dealing with the final component... */ #define LOOKUP_OPEN 0x0100 /* ... in open */ #define LOOKUP_CREATE 0x0200 /* ... in object creation */ #define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */ #define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */ /* internal use only */ #define LOOKUP_PARENT 0x0010 #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_ROOT_GRABBED 0x0008 /* Scoping flags for lookup. */ #define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */ #define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */ #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) extern int path_pts(struct path *path); extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); static inline int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) { return user_path_at_empty(dfd, name, flags, path, NULL); } extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *); extern int follow_down(struct path *); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); extern int __must_check nd_jump_link(struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { ((char *) name)[min(len, maxlen)] = '\0'; } /** * retry_estale - determine whether the caller should retry an operation * @error: the error that would currently be returned * @flags: flags being used for next lookup attempt * * Check to see if the error code was -ESTALE, and then determine whether * to retry the call based on whether "flags" already has LOOKUP_REVAL set. * * Returns true if t