1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 /* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/wireless/trace.h */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg802154 #if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __RDEV_CFG802154_OPS_TRACE #include <linux/tracepoint.h> #include <net/cfg802154.h> #define MAXNAME 32 #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) #define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \ wpan_phy_name(wpan_phy), \ MAXNAME) #define WPAN_PHY_PR_FMT "%s" #define WPAN_PHY_PR_ARG __entry->wpan_phy_name #define WPAN_DEV_ENTRY __field(u32, identifier) #define WPAN_DEV_ASSIGN (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \ ? wpan_dev->identifier : 0) #define WPAN_DEV_PR_FMT "wpan_dev(%u)" #define WPAN_DEV_PR_ARG (__entry->identifier) #define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ __field(enum nl802154_cca_opts, cca_opt) #define WPAN_CCA_ASSIGN \ do { \ (__entry->cca_mode) = cca->mode; \ (__entry->cca_opt) = cca->opt; \ } while (0) #define WPAN_CCA_PR_FMT "cca_mode: %d, cca_opt: %d" #define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt #define BOOL_TO_STR(bo) (bo) ? "true" : "false" /************************************************************* * rdev->ops traces * *************************************************************/ DECLARE_EVENT_CLASS(wpan_phy_only_evt, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy), TP_STRUCT__entry( WPAN_PHY_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT, WPAN_PHY_PR_ARG) ); DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_suspend, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy) ); DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_resume, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy) ); TRACE_EVENT(802154_rdev_add_virtual_intf, TP_PROTO(struct wpan_phy *wpan_phy, char *name, enum nl802154_iftype type, __le64 extended_addr), TP_ARGS(wpan_phy, name, type, extended_addr), TP_STRUCT__entry( WPAN_PHY_ENTRY __string(vir_intf_name, name ? name : "<noname>") __field(enum nl802154_iftype, type) __field(__le64, extended_addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; __assign_str(vir_intf_name, name ? name : "<noname>"); __entry->type = type; __entry->extended_addr = extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, extended addr: 0x%llx", WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type, __le64_to_cpu(__entry->extended_addr)) ); TRACE_EVENT(802154_rdev_del_virtual_intf, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG) ); TRACE_EVENT(802154_rdev_set_channel, TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel), TP_ARGS(wpan_phy, page, channel), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, page) __field(u8, channel) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->page = page; __entry->channel = channel; ), TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG, __entry->page, __entry->channel) ); TRACE_EVENT(802154_rdev_set_tx_power, TP_PROTO(struct wpan_phy *wpan_phy, s32 power), TP_ARGS(wpan_phy, power), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(s32, power) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->power = power; ), TP_printk(WPAN_PHY_PR_FMT ", mbm: %d", WPAN_PHY_PR_ARG, __entry->power) ); TRACE_EVENT(802154_rdev_set_cca_mode, TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), TP_ARGS(wpan_phy, cca), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_CCA_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_CCA_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG, WPAN_CCA_PR_ARG) ); TRACE_EVENT(802154_rdev_set_cca_ed_level, TP_PROTO(struct wpan_phy *wpan_phy, s32 ed_level), TP_ARGS(wpan_phy, ed_level), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(s32, ed_level) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->ed_level = ed_level; ), TP_printk(WPAN_PHY_PR_FMT ", ed level: %d", WPAN_PHY_PR_ARG, __entry->ed_level) ); DECLARE_EVENT_CLASS(802154_le16_template, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le16, le16arg) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->le16arg = le16arg; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __le16_to_cpu(__entry->le16arg)) ); DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg) ); DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", short addr: 0x%04x", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __le16_to_cpu(__entry->le16arg)) ); TRACE_EVENT(802154_rdev_set_backoff_exponent, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 min_be, u8 max_be), TP_ARGS(wpan_phy, wpan_dev, min_be, max_be), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(u8, min_be) __field(u8, max_be) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->min_be = min_be; __entry->max_be = max_be; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", min be: %d, max be: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be) ); TRACE_EVENT(802154_rdev_set_csma_backoffs, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 max_csma_backoffs), TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(u8, max_csma_backoffs) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->max_csma_backoffs = max_csma_backoffs; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", max csma backoffs: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->max_csma_backoffs) ); TRACE_EVENT(802154_rdev_set_max_frame_retries, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, s8 max_frame_retries), TP_ARGS(wpan_phy, wpan_dev, max_frame_retries), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(s8, max_frame_retries) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->max_frame_retries = max_frame_retries; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", max frame retries: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->max_frame_retries) ); TRACE_EVENT(802154_rdev_set_lbt_mode, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool mode), TP_ARGS(wpan_phy, wpan_dev, mode), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(bool, mode) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->mode = mode; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", lbt mode: %s", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode)) ); TRACE_EVENT(802154_rdev_set_ackreq_default, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool ackreq), TP_ARGS(wpan_phy, wpan_dev, ackreq), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(bool, ackreq) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->ackreq = ackreq; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", ackreq default: %s", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->ackreq)) ); TRACE_EVENT(802154_rdev_return_int, TP_PROTO(struct wpan_phy *wpan_phy, int ret), TP_ARGS(wpan_phy, ret), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(int, ret) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->ret = ret; ), TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG, __entry->ret) ); #endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM kmem #if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KMEM_H #include <linux/types.h> #include <linux/tracepoint.h> #include <trace/events/mmflags.h> DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) __field( size_t, bytes_req ) __field( size_t, bytes_alloc ) __field( gfp_t, gfp_flags ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; __entry->bytes_req = bytes_req; __entry->bytes_alloc = bytes_alloc; __entry->gfp_flags = gfp_flags; ), TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(kmem_alloc, kmalloc, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, int node), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) __field( size_t, bytes_req ) __field( size_t, bytes_alloc ) __field( gfp_t, gfp_flags ) __field( int, node ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; __entry->bytes_req = bytes_req; __entry->bytes_alloc = bytes_alloc; __entry->gfp_flags = gfp_flags; __entry->node = node; ), TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags), __entry->node) ); DEFINE_EVENT(kmem_alloc_node, kmalloc_node, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, int node), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, int node), TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), TP_ARGS(call_site, ptr), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; ), TP_printk("call_site=%pS ptr=%p", (void *)__entry->call_site, __entry->ptr) ); DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), TP_ARGS(call_site, ptr) ); DEFINE_EVENT(kmem_free, kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr), TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->order = order; ), TP_printk("page=%p pfn=%lu order=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order) ); TRACE_EVENT(mm_page_free_batched, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field( unsigned long, pfn ) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); ), TP_printk("page=%p pfn=%lu order=0", pfn_to_page(__entry->pfn), __entry->pfn) ); TRACE_EVENT(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype), TP_ARGS(page, order, gfp_flags, migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) __field( gfp_t, gfp_flags ) __field( int, migratetype ) ), TP_fast_assign( __entry->pfn = page ? page_to_pfn(page) : -1UL; __entry->order = order; __entry->gfp_flags = gfp_flags; __entry->migratetype = migratetype; ), TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, __entry->pfn != -1UL ? __entry->pfn : 0, __entry->order, __entry->migratetype, show_gfp_flags(__entry->gfp_flags)) ); DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) __field( int, migratetype ) ), TP_fast_assign( __entry->pfn = page ? page_to_pfn(page) : -1UL; __entry->order = order; __entry->migratetype = migratetype; ), TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, __entry->pfn != -1UL ? __entry->pfn : 0, __entry->order, __entry->migratetype, __entry->order == 0) ); DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype) ); TRACE_EVENT(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) __field( int, migratetype ) ), TP_fast_assign( __entry->pfn = page ? page_to_pfn(page) : -1UL; __entry->order = order; __entry->migratetype = migratetype; ), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, int alloc_order, int fallback_order, int alloc_migratetype, int fallback_migratetype), TP_ARGS(page, alloc_order, fallback_order, alloc_migratetype, fallback_migratetype), TP_STRUCT__entry( __field( unsigned long, pfn ) __field( int, alloc_order ) __field( int, fallback_order ) __field( int, alloc_migratetype ) __field( int, fallback_migratetype ) __field( int, change_ownership ) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->alloc_order = alloc_order; __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; __entry->change_ownership = (alloc_migratetype == get_pageblock_migratetype(page)); ), TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->alloc_order, __entry->fallback_order, pageblock_order, __entry->alloc_migratetype, __entry->fallback_migratetype, __entry->fallback_order < pageblock_order, __entry->change_ownership) ); /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ #ifndef __PTR_TO_HASHVAL static unsigned int __maybe_unused mm_ptr_to_hash(const void *ptr) { int ret; unsigned long hashval; ret = ptr_to_hashval(ptr, &hashval); if (ret) return 0; /* The hashed value is only 32-bit */ return (unsigned int)hashval; } #define __PTR_TO_HASHVAL #endif TRACE_EVENT(rss_stat, TP_PROTO(struct mm_struct *mm, int member, long count), TP_ARGS(mm, member, count), TP_STRUCT__entry( __field(unsigned int, mm_id) __field(unsigned int, curr) __field(int, member) __field(long, size) ), TP_fast_assign( __entry->mm_id = mm_ptr_to_hash(mm); __entry->curr = !!(current->mm == mm); __entry->member = member; __entry->size = (count << PAGE_SHIFT); ), TP_printk("mm_id=%u curr=%d member=%d size=%ldB", __entry->mm_id, __entry->curr, __entry->member, __entry->size) ); #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TIMERQUEUE_H #define _LINUX_TIMERQUEUE_H #include <linux/rbtree.h> #include <linux/ktime.h> struct timerqueue_node { struct rb_node node; ktime_t expires; }; struct timerqueue_head { struct rb_root_cached rb_root; }; extern bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node); extern bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node); extern struct timerqueue_node *timerqueue_iterate_next( struct timerqueue_node *node); /** * timerqueue_getnext - Returns the timer with the earliest expiration time * * @head: head of timerqueue * * Returns a pointer to the timer node that has the earliest expiration time. */ static inline struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) { struct rb_node *leftmost = rb_first_cached(&head->rb_root); return rb_entry(leftmost, struct timerqueue_node, node); } static inline void timerqueue_init(struct timerqueue_node *node) { RB_CLEAR_NODE(&node->node); } static inline bool timerqueue_node_queued(struct timerqueue_node *node) { return !RB_EMPTY_NODE(&node->node); } static inline bool timerqueue_node_expires(struct timerqueue_node *node) { return node->expires; } static inline void timerqueue_init_head(struct timerqueue_head *head) { head->rb_root = RB_ROOT_CACHED; } #endif /* _LINUX_TIMERQUEUE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM msr #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE msr-trace #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH asm/ #if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MSR_H #include <linux/tracepoint.h> /* * Tracing for x86 model specific registers. Directly maps to the * RDMSR/WRMSR instructions. */ DECLARE_EVENT_CLASS(msr_trace_class, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed), TP_STRUCT__entry( __field( unsigned, msr ) __field( u64, val ) __field( int, failed ) ), TP_fast_assign( __entry->msr = msr; __entry->val = val; __entry->failed = failed; ), TP_printk("%x, value %llx%s", __entry->msr, __entry->val, __entry->failed ? " #GP" : "") ); DEFINE_EVENT(msr_trace_class, read_msr, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed) ); DEFINE_EVENT(msr_trace_class, write_msr, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed) ); DEFINE_EVENT(msr_trace_class, rdpmc, TP_PROTO(unsigned msr, u64 val, int failed), TP_ARGS(msr, val, failed) ); #endif /* _TRACE_MSR_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ /* * Type definitions for the multi-level security (MLS) policy. * * Author : Stephen Smalley, <sds@tycho.nsa.gov> */ /* * Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com> * * Support for enhanced MLS infrastructure. * * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. */ #ifndef _SS_MLS_TYPES_H_ #define _SS_MLS_TYPES_H_ #include "security.h" #include "ebitmap.h" struct mls_level { u32 sens; /* sensitivity */ struct ebitmap cat; /* category set */ }; struct mls_range { struct mls_level level[2]; /* low == level[0], high == level[1] */ }; static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2) { return ((l1->sens == l2->sens) && ebitmap_cmp(&l1->cat, &l2->cat)); } static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2) { return ((l1->sens >= l2->sens) && ebitmap_contains(&l1->cat, &l2->cat, 0)); } #define mls_level_incomp(l1, l2) \ (!mls_level_dom((l1), (l2)) && !mls_level_dom((l2), (l1))) #define mls_level_between(l1, l2, l3) \ (mls_level_dom((l1), (l2)) && mls_level_dom((l3), (l1))) #define mls_range_contains(r1, r2) \ (mls_level_dom(&(r2).level[0], &(r1).level[0]) && \ mls_level_dom(&(r1).level[1], &(r2).level[1])) #endif /* _SS_MLS_TYPES_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Filesystem parameter description and parser * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_FS_PARSER_H #define _LINUX_FS_PARSER_H #include <linux/fs_context.h> struct path; struct constant_table { const char *name; int value; }; struct fs_parameter_spec; struct fs_parse_result; typedef int fs_param_type(struct p_log *, const struct fs_parameter_spec *, struct fs_parameter *, struct fs_parse_result *); /* * The type of parameter expected. */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, fs_param_is_path, fs_param_is_fd; /* * Specification of the type of value a parameter wants. * * Note that the fsparam_flag(), fsparam_string(), fsparam_u32(), ... macros * should be used to generate elements of this type. */ struct fs_parameter_spec { const char *name; fs_param_type *type; /* The desired parameter type */ u8 opt; /* Option number (returned by fs_parse()) */ unsigned short flags; #define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */ #define fs_param_neg_with_empty 0x0004 /* "xxx=" is negative param */ #define fs_param_deprecated 0x0008 /* The param is deprecated */ const void *data; }; /* * Result of parse. */ struct fs_parse_result { bool negated; /* T if param was "noxxx" */ union { bool boolean; /* For spec_bool */ int int_32; /* For spec_s32/spec_enum */ unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ u64 uint_64; /* For spec_u64 */ }; }; extern int __fs_parse(struct p_log *log, const struct fs_parameter_spec *desc, struct fs_parameter *value, struct fs_parse_result *result); static inline int fs_parse(struct fs_context *fc, const struct fs_parameter_spec *desc, struct fs_parameter *param, struct fs_parse_result *result) { return __fs_parse(&fc->log, desc, param, result); } extern int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, struct path *_path); extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); #ifdef CONFIG_VALIDATE_FS_PARSER extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, int low, int high, int special); extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); #else static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, int low, int high, int special) { return true; } static inline bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { return true; } #endif /* * Parameter type, name, index and flags element constructors. Use as: * * fsparam_xxxx("foo", Opt_foo) * * If existing helpers are not enough, direct use of __fsparam() would * work, but any such case is probably a sign that new helper is needed. * Helpers will remain stable; low-level implementation may change. */ #define __fsparam(TYPE, NAME, OPT, FLAGS, DATA) \ { \ .name = NAME, \ .opt = OPT, \ .type = TYPE, \ .flags = FLAGS, \ .data = DATA \ } #define fsparam_flag(NAME, OPT) __fsparam(NULL, NAME, OPT, 0, NULL) #define fsparam_flag_no(NAME, OPT) \ __fsparam(NULL, NAME, OPT, fs_param_neg_with_no, NULL) #define fsparam_bool(NAME, OPT) __fsparam(fs_param_is_bool, NAME, OPT, 0, NULL) #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL) #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) #define fsparam_u32hex(NAME, OPT) \ __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) #define fsparam_string(NAME, OPT) \ __fsparam(fs_param_is_string, NAME, OPT, 0, NULL) #define fsparam_blob(NAME, OPT) __fsparam(fs_param_is_blob, NAME, OPT, 0, NULL) #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) #endif /* _LINUX_FS_PARSER_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H #include <linux/mm_types_task.h> #include <linux/auxvec.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> #include <asm/mmu.h> #ifndef AT_VECTOR_SIZE_ARCH #define AT_VECTOR_SIZE_ARCH 0 #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) #define INIT_PASID 0 struct address_space; struct mem_cgroup; /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using * a page, though if it is a pagecache page, rmap structures can tell us * who is mapping it. * * If you allocate the page using alloc_pages(), you can use some of the * space in struct page for your own purposes. The five words in the main * union are available, except for bit 0 of the first word which must be * kept clear. Many users use this word to store a pointer to an object * which is guaranteed to be aligned. If you use the same storage as * page->mapping, you must restore it to NULL before freeing the page. * * If your page will not be mapped to userspace, you can also use the four * bytes in the mapcount union, but you must call page_mapcount_reset() * before freeing it. * * If you want to use the refcount field, it must be used in such a way * that other CPUs temporarily incrementing and then decrementing the * refcount does not cause problems. On receiving the page from * alloc_pages(), the refcount will be positive. * * If you allocate pages of order > 0, you can use some of the fields * in each subpage, but you may need to restore some of their values * afterwards. * * SLUB uses cmpxchg_double() to atomically update its freelist and * counters. That requires that freelist & counters be adjacent and * double-word aligned. We align all struct pages to double-word * boundaries, and ensure that 'freelist' is aligned within the * struct. */ #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) #else #define _struct_page_alignment #endif struct page { unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ /* * Five words (20/40 bytes) are available in this union. * WARNING: bit 0 of the first word is used for PageTail(). That * means the other users of this union MUST NOT use the bit to * avoid collision and false-positive PageTail(). */ union { struct { /* Page cache and anonymous pages */ /** * @lru: Pageout list, eg. active_list protected by * pgdat->lru_lock. Sometimes used as a generic list * by the page owner. */ struct list_head lru; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; pgoff_t index; /* Our offset within mapping. */ /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. * Used for swp_entry_t if PageSwapCache. * Indicates order in the buddy system if PageBuddy. */ unsigned long private; }; struct { /* page_pool used by netstack */ /** * @dma_addr: might require a 64-bit value on * 32-bit architectures. */ unsigned long dma_addr[2]; }; struct { /* slab, slob and slub */ union { struct list_head slab_list; struct { /* Partial pages */ struct page *next; #ifdef CONFIG_64BIT int pages; /* Nr of pages left */ int pobjects; /* Approximate count */ #else short int pages; short int pobjects; #endif }; }; struct kmem_cache *slab_cache; /* not slob */ /* Double-word boundary */ void *freelist; /* first free object */ union { void *s_mem; /* slab: first object */ unsigned long counters; /* SLUB */ struct { /* SLUB */ unsigned inuse:16; unsigned objects:15; unsigned frozen:1; }; }; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ /* First tail page only */ unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; unsigned int compound_nr; /* 1 << compound_order */ }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ atomic_t hpage_pinned_refcount; /* For both global and memcg */ struct list_head deferred_list; }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ unsigned long _pt_pad_2; /* mapping */ union { struct mm_struct *pt_mm; /* x86 pgds only */ atomic_t pt_frag_refcount; /* powerpc */ }; #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else spinlock_t ptl; #endif }; struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being * mapped so the next 3 words hold the mapping, index, * and private fields from the source anonymous or * page cache page while the page is migrated to device * private memory. * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also * use the mapping, index, and private fields when * pmem backed DAX files are mapped. */ }; /** @rcu_head: You can use this to free a page by RCU. */ struct rcu_head rcu_head; }; union { /* This union is 4 bytes in size. */ /* * If the page can be mapped to userspace, encodes the number * of times this page is referenced by a page table. */ atomic_t _mapcount; /* * If the page is neither PageSlab nor mappable to userspace, * the value stored here may help determine what this page * is used for. See page-flags.h for a list of page types * which are currently stored here. */ unsigned int page_type; unsigned int active; /* SLAB */ int units; /* SLOB */ }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; #ifdef CONFIG_MEMCG union { struct mem_cgroup *mem_cgroup; struct obj_cgroup **obj_cgroups; }; #endif /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with * highmem some memory is mapped into kernel virtual memory * dynamically, so we need a place to store that address. * Note that this field could be 16 bits on x86 ... ;) * * Architectures with slow multiplication can define * WANT_PAGE_VIRTUAL in asm/page.h */ #if defined(WANT_PAGE_VIRTUAL) void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif } _struct_page_alignment; static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; } static inline atomic_t *compound_pincount_ptr(struct page *page) { return &page[2].hpage_pinned_refcount; } /* * Used for sizing the vmemmap region on some architectures */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) #define page_private(page) ((page)->private) static inline void set_page_private(struct page *page, unsigned long private) { page->private = private; } struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) __u16 offset; __u16 size; #else __u32 offset; #endif /* we maintain a pagecount bias, so that we dont dirty cache line * containing page->_refcount every time we allocate a fragment. */ unsigned int pagecnt_bias; bool pfmemalloc; }; typedef unsigned long vm_flags_t; /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that * map parts of them. */ struct vm_region { struct rb_node vm_rb; /* link in global region tree */ vm_flags_t vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for * this region */ }; #ifdef CONFIG_USERFAULTFD #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, }) struct vm_userfaultfd_ctx { struct userfaultfd_ctx *ctx; }; #else /* CONFIG_USERFAULTFD */ #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {}) struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ unsigned long vm_start; /* Our start address within vm_mm. */ unsigned long vm_end; /* The first byte after our end address within vm_mm. */ /* linked list of VM areas per task, sorted by address */ struct vm_area_struct *vm_next, *vm_prev; struct rb_node vm_rb; /* * Largest free memory gap in bytes to the left of this VMA. * Either between this VMA and vma->vm_prev, or between one of the * VMAs below us in the VMA rbtree and its ->vm_prev. This helps * get_unmapped_area find a free area of the right size. */ unsigned long rb_subtree_gap; /* Second cache line starts here. */ struct mm_struct *vm_mm; /* The address space we belong to. */ /* * Access permissions of this VMA. * See vmf_insert_mixed_prot() for discussion. */ pgprot_t vm_page_prot; unsigned long vm_flags; /* Flags, see mm.h. */ /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. */ struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list. */ struct list_head anon_vma_chain; /* Serialized by mmap_lock & * page_table_lock */ struct anon_vma *anon_vma; /* Serialized by page_table_lock */ /* Function pointers to deal with this struct. */ const struct vm_operations_struct *vm_ops; /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif #ifndef CONFIG_MMU struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; struct core_thread { struct task_struct *task; struct core_thread *next; }; struct core_state { atomic_t nr_threads; struct core_thread dumper; struct completion startup; }; struct kioctx_table; struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES /* Base adresses for compatible mmap() */ unsigned long mmap_compat_base; unsigned long mmap_compat_legacy_base; #endif unsigned long task_size; /* size of task vm space */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; #ifdef CONFIG_MEMBARRIER /** * @membarrier_state: Flags controlling membarrier behavior. * * This field is close to @pgd to hopefully fit in the same * cache-line, which needs to be touched by switch_mm(). */ atomic_t membarrier_state; #endif /** * @mm_users: The number of users including userspace. * * Use mmget()/mmget_not_zero()/mmput() to modify. When this * drops to 0 (i.e. when the task exits and there are no other * temporary reference holders), we also release a reference on * @mm_count (which may then free the &struct mm_struct if * @mm_count also drops to 0). */ atomic_t mm_users; /** * @mm_count: The number of references to &struct mm_struct * (@mm_users count as 1). * * Use mmgrab()/mmdrop() to modify. When this drops to 0, the * &struct mm_struct is freed. */ atomic_t mm_count; /** * @has_pinned: Whether this mm has pinned any pages. This can * be either replaced in the future by @pinned_vm when it * becomes stable, or grow into a counter on its own. We're * aggresive on this bit now - even if the pinned pages were * unpinned later on, we'll still keep this bit set for the * lifecycle of this mm just for simplicity. */ atomic_t has_pinned; #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some * counters */ /* * With some kernel config, the current mmap_lock's offset * inside 'mm_struct' is at 0x120, which is very optimal, as * its two hot fields 'count' and 'owner' sit in 2 different * cachelines, and when mmap_lock is highly contended, both * of the 2 fields will be accessed frequently, current layout * will help to reduce cache bouncing. * * So please be careful with adding new fields before * mmap_lock, which can easily push the 2 fields into one * cacheline. */ struct rw_semaphore mmap_lock; struct list_head mmlist; /* List of maybe swapped mm's. These * are globally strung together off * init_mm.mmlist, and are protected * by mmlist_lock */ unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ unsigned long total_vm; /* Total pages mapped */ unsigned long locked_vm; /* Pages that have PG_mlocked set */ atomic64_t pinned_vm; /* Refcount permanently increased */ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ unsigned long def_flags; /** * @write_protect_seq: Locked when any thread is write * protecting pages mapped by this mm to enforce a later COW, * for instance during page table copying for fork(). */ seqcount_t write_protect_seq; spinlock_t arg_lock; /* protect the below fields */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ /* * Special counters, in some configurations protected by the * page_table_lock, in other configurations by being atomic. */ struct mm_rss_stat rss_stat; struct linux_binfmt *binfmt; /* Architecture-specific MM context */ mm_context_t context; unsigned long flags; /* Must use atomic bitops to access */ struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MEMCG /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in * order for it to be changed: * * current == mm->owner * current->mm != mm * new_owner->mm == mm * new_owner->alloc_lock is held */ struct task_struct __rcu *owner; #endif struct user_namespace *user_ns; /* store ref to file /proc/<pid>/exe symlink points to */ struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_subscriptions *notifier_subscriptions; #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING /* * numa_next_scan is the next time that the PTEs will be marked * pte_numa. NUMA hinting faults will gather statistics and * migrate pages to new nodes if necessary. */ unsigned long numa_next_scan; /* Restart point for scanning and setting pte_numa */ unsigned long numa_scan_offset; /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; #endif /* * An operation with batched TLB flushing is going on. Anything * that can move process memory needs to flush the TLB when * moving a PROT_NONE or PROT_NUMA mapped page. */ atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; #ifdef CONFIG_IOMMU_SUPPORT u32 pasid; #endif } __randomize_layout; /* * The mm_cpumask needs to be at the end of mm_struct, because it * is dynamically sized based on nr_cpu_ids. */ unsigned long cpu_bitmap[]; }; extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ static inline void mm_init_cpumask(struct mm_struct *mm) { unsigned long cpu_bitmap = (unsigned long)mm; cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { return (struct cpumask *)&mm->cpu_bitmap; } struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); extern void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); } static inline void inc_tlb_flush_pending(struct mm_struct *mm) { atomic_inc(&mm->tlb_flush_pending); /* * The only time this value is relevant is when there are indeed pages * to flush. And we'll only flush pages after changing them, which * requires the PTL. * * So the ordering here is: * * atomic_inc(&mm->tlb_flush_pending); * spin_lock(&ptl); * ... * set_pte_at(); * spin_unlock(&ptl); * * spin_lock(&ptl) * mm_tlb_flush_pending(); * .... * spin_unlock(&ptl); * * flush_tlb_range(); * atomic_dec(&mm->tlb_flush_pending); * * Where the increment if constrained by the PTL unlock, it thus * ensures that the increment is visible if the PTE modification is * visible. After all, if there is no PTE modification, nobody cares * about TLB flushes either. * * This very much relies on users (mm_tlb_flush_pending() and * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc * locks (PPC) the unlock of one doesn't order against the lock of * another PTL. * * The decrement is ordered by the flush_tlb_range(), such that * mm_tlb_flush_pending() will not return false unless all flushes have * completed. */ } static inline void dec_tlb_flush_pending(struct mm_struct *mm) { /* * See inc_tlb_flush_pending(). * * This cannot be smp_mb__before_atomic() because smp_mb() simply does * not order against TLB invalidate completion, which is what we need. * * Therefore we must rely on tlb_flush_*() to guarantee order. */ atomic_dec(&mm->tlb_flush_pending); } static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { /* * Must be called after having acquired the PTL; orders against that * PTLs release and therefore ensures that if we observe the modified * PTE we must also observe the increment from inc_tlb_flush_pending(). * * That is, it only guarantees to return true if there is a flush * pending for _this_ PTL. */ return atomic_read(&mm->tlb_flush_pending); } static inline bool mm_tlb_flush_nested(struct mm_struct *mm) { /* * Similar to mm_tlb_flush_pending(), we must have acquired the PTL * for which there is a TLB flush pending in order to guarantee * we've seen both that PTE modification and the increment. * * (no requirement on actually still holding the PTL, that is irrelevant) */ return atomic_read(&mm->tlb_flush_pending) > 1; } struct vm_fault; /** * typedef vm_fault_t - Return type for page fault handlers. * * Page fault handlers return a bitmask of %VM_FAULT values. */ typedef __bitwise unsigned int vm_fault_t; /** * enum vm_fault_reason - Page fault handlers return a bitmask of * these values to tell the core VM what happened when handling the * fault. Used to decide whether a process gets delivered SIGBUS or * just gets major/minor fault counters bumped up. * * @VM_FAULT_OOM: Out Of Memory * @VM_FAULT_SIGBUS: Bad access * @VM_FAULT_MAJOR: Page read from storage * @VM_FAULT_WRITE: Special case for get_user_pages * @VM_FAULT_HWPOISON: Hit poisoned small page * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded * in upper bits * @VM_FAULT_SIGSEGV: segmentation fault * @VM_FAULT_NOPAGE: ->fault installed the pte, not return page * @VM_FAULT_LOCKED: ->fault locked the returned page * @VM_FAULT_RETRY: ->fault blocked, must retry * @VM_FAULT_FALLBACK: huge page fault failed, fall back to small * @VM_FAULT_DONE_COW: ->fault has fully handled COW * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs * fsync() to complete (for synchronous page faults * in DAX) * @VM_FAULT_HINDEX_MASK: mask HINDEX value * */ enum vm_fault_reason { VM_FAULT_OOM = (__force vm_fault_t)0x000001, VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002, VM_FAULT_MAJOR = (__force vm_fault_t)0x000004, VM_FAULT_WRITE = (__force vm_fault_t)0x000008, VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010, VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020, VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040, VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100, VM_FAULT_LOCKED = (__force vm_fault_t)0x000200, VM_FAULT_RETRY = (__force vm_fault_t)0x000400, VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, }; /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16)) #define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \ VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \ VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK) #define VM_FAULT_RESULT_TRACE \ { VM_FAULT_OOM, "OOM" }, \ { VM_FAULT_SIGBUS, "SIGBUS" }, \ { VM_FAULT_MAJOR, "MAJOR" }, \ { VM_FAULT_WRITE, "WRITE" }, \ { VM_FAULT_HWPOISON, "HWPOISON" }, \ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ { VM_FAULT_NOPAGE, "NOPAGE" }, \ { VM_FAULT_LOCKED, "LOCKED" }, \ { VM_FAULT_RETRY, "RETRY" }, \ { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ /* * If .fault is not provided, this points to a * NULL-terminated array of pages that back the special mapping. * * This must not be NULL unless .fault is provided. */ struct page **pages; /* * If non-NULL, then this is called to resolve page faults * on the special mapping. If used, .pages is not checked. */ vm_fault_t (*fault)(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf); int (*mremap)(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma); }; enum tlb_flush_reason { TLB_FLUSH_ON_TASK_SWITCH, TLB_REMOTE_SHOOTDOWN, TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, NR_TLB_FLUSH_REASONS, }; /* * A swap entry has to fit into a "unsigned long", as the entry is hidden * in the "index" field of the swapper address space. */ typedef struct { unsigned long val; } swp_entry_t; #endif /* _LINUX_MM_TYPES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PGALLLC_TRACK_H #define _LINUX_PGALLLC_TRACK_H #if defined(CONFIG_MMU) static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, unsigned long address, pgtbl_mod_mask *mod_mask) { if (unlikely(pgd_none(*pgd))) { if (__p4d_alloc(mm, pgd, address)) return NULL; *mod_mask |= PGTBL_PGD_MODIFIED; } return p4d_offset(pgd, address); } static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, unsigned long address, pgtbl_mod_mask *mod_mask) { if (unlikely(p4d_none(*p4d))) { if (__pud_alloc(mm, p4d, address)) return NULL; *mod_mask |= PGTBL_P4D_MODIFIED; } return pud_offset(p4d, address); } static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, unsigned long address, pgtbl_mod_mask *mod_mask) { if (unlikely(pud_none(*pud))) { if (__pmd_alloc(mm, pud, address)) return NULL; *mod_mask |= PGTBL_PUD_MODIFIED; } return pmd_offset(pud, address); } #endif /* CONFIG_MMU */ #define pte_alloc_kernel_track(pmd, address, mask) \ ((unlikely(pmd_none(*(pmd))) && \ (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\ NULL: pte_offset_kernel(pmd, address)) #endif /* _LINUX_PGALLLC_TRACK_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the ICMP module. * * Version: @(#)icmp.h 1.0.4 05/13/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _ICMP_H #define _ICMP_H #include <linux/icmp.h> #include <net/inet_sock.h> #include <net/snmp.h> #include <net/ip.h> struct icmp_err { int errno; unsigned int fatal:1; }; extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define __ICMP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) #define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) struct dst_entry; struct net_proto_family; struct sk_buff; struct net; void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, const struct ip_options *opt); static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); } #if IS_ENABLED(CONFIG_NF_NAT) void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); #else static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct ip_options opts = { 0 }; __icmp_send(skb_in, type, code, info, &opts); } #endif int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); void icmp_out_count(struct net *net, unsigned char type); #endif /* _ICMP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_H #define _LINUX_TTY_H #include <linux/fs.h> #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/mutex.h> #include <linux/tty_flags.h> #include <linux/seq_file.h> #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> /* * Lock subclasses for tty locks * * TTY_LOCK_NORMAL is for normal ttys and master ptys. * TTY_LOCK_SLAVE is for slave ptys only. * * Lock subclasses are necessary for handling nested locking with pty pairs. * tty locks which use nested locking: * * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. * The stable lock order is master pty first, then slave pty. * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. * Subclassing this lock enables the slave pty to hold its * termios_rwsem when claiming the master tty_buffer lock. * tty_buffer lock - slave ptys can claim nested buffer lock when handling * signal chars. The stable lock order is slave pty, then * master. */ enum { TTY_LOCK_NORMAL = 0, TTY_LOCK_SLAVE, }; /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) */ #define NR_UNIX98_PTY_DEFAULT 4096 /* Default maximum for Unix98 ptys */ #define NR_UNIX98_PTY_RESERVE 1024 /* Default reserve for main devpts */ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character * isn't in use (eg VINTR has no character etc) */ #define __DISABLED_CHAR '\0' struct tty_buffer { union { struct tty_buffer *next; struct llist_node free; }; int used; int size; int commit; int read; int flags; /* Data points here */ unsigned long data[]; }; /* Values for .flags field of tty_buffer */ #define TTYB_NORMAL 1 /* buffer has no flags buffer */ static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs) { return ((unsigned char *)b->data) + ofs; } static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs) { return (char *)char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ atomic_t mem_used; /* In-use buffers excluding free list */ int mem_limit; struct tty_buffer *tail; /* Active buffer */ }; /* * When a break, frame error, or parity error happens, these codes are * stuffed into the flags buffer. */ #define TTY_NORMAL 0 #define TTY_BREAK 1 #define TTY_FRAME 2 #define TTY_PARITY 3 #define TTY_OVERRUN 4 #define INTR_CHAR(tty) ((tty)->termios.c_cc[VINTR]) #define QUIT_CHAR(tty) ((tty)->termios.c_cc[VQUIT]) #define ERASE_CHAR(tty) ((tty)->termios.c_cc[VERASE]) #define KILL_CHAR(tty) ((tty)->termios.c_cc[VKILL]) #define EOF_CHAR(tty) ((tty)->termios.c_cc[VEOF]) #define TIME_CHAR(tty) ((tty)->termios.c_cc[VTIME]) #define MIN_CHAR(tty) ((tty)->termios.c_cc[VMIN]) #define SWTC_CHAR(tty) ((tty)->termios.c_cc[VSWTC]) #define START_CHAR(tty) ((tty)->termios.c_cc[VSTART]) #define STOP_CHAR(tty) ((tty)->termios.c_cc[VSTOP]) #define SUSP_CHAR(tty) ((tty)->termios.c_cc[VSUSP]) #define EOL_CHAR(tty) ((tty)->termios.c_cc[VEOL]) #define REPRINT_CHAR(tty) ((tty)->termios.c_cc[VREPRINT]) #define DISCARD_CHAR(tty) ((tty)->termios.c_cc[VDISCARD]) #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE]) #define LNEXT_CHAR(tty) ((tty)->termios.c_cc[VLNEXT]) #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2]) #define _I_FLAG(tty, f) ((tty)->termios.c_iflag & (f)) #define _O_FLAG(tty, f) ((tty)->termios.c_oflag & (f)) #define _C_FLAG(tty, f) ((tty)->termios.c_cflag & (f)) #define _L_FLAG(tty, f) ((tty)->termios.c_lflag & (f)) #define I_IGNBRK(tty) _I_FLAG((tty), IGNBRK) #define I_BRKINT(tty) _I_FLAG((tty), BRKINT) #define I_IGNPAR(tty) _I_FLAG((tty), IGNPAR) #define I_PARMRK(tty) _I_FLAG((tty), PARMRK) #define I_INPCK(tty) _I_FLAG((tty), INPCK) #define I_ISTRIP(tty) _I_FLAG((tty), ISTRIP) #define I_INLCR(tty) _I_FLAG((tty), INLCR) #define I_IGNCR(tty) _I_FLAG((tty), IGNCR) #define I_ICRNL(tty) _I_FLAG((tty), ICRNL) #define I_IUCLC(tty) _I_FLAG((tty), IUCLC) #define I_IXON(tty) _I_FLAG((tty), IXON) #define I_IXANY(tty) _I_FLAG((tty), IXANY) #define I_IXOFF(tty) _I_FLAG((tty), IXOFF) #define I_IMAXBEL(tty) _I_FLAG((tty), IMAXBEL) #define I_IUTF8(tty) _I_FLAG((tty), IUTF8) #define O_OPOST(tty) _O_FLAG((tty), OPOST) #define O_OLCUC(tty) _O_FLAG((tty), OLCUC) #define O_ONLCR(tty) _O_FLAG((tty), ONLCR) #define O_OCRNL(tty) _O_FLAG((tty), OCRNL) #define O_ONOCR(tty) _O_FLAG((tty), ONOCR) #define O_ONLRET(tty) _O_FLAG((tty), ONLRET) #define O_OFILL(tty) _O_FLAG((tty), OFILL) #define O_OFDEL(tty) _O_FLAG((tty), OFDEL) #define O_NLDLY(tty) _O_FLAG((tty), NLDLY) #define O_CRDLY(tty) _O_FLAG((tty), CRDLY) #define O_TABDLY(tty) _O_FLAG((tty), TABDLY) #define O_BSDLY(tty) _O_FLAG((tty), BSDLY) #define O_VTDLY(tty) _O_FLAG((tty), VTDLY) #define O_FFDLY(tty) _O_FLAG((tty), FFDLY) #define C_BAUD(tty) _C_FLAG((tty), CBAUD) #define C_CSIZE(tty) _C_FLAG((tty), CSIZE) #define C_CSTOPB(tty) _C_FLAG((tty), CSTOPB) #define C_CREAD(tty) _C_FLAG((tty), CREAD) #define C_PARENB(tty) _C_FLAG((tty), PARENB) #define C_PARODD(tty) _C_FLAG((tty), PARODD) #define C_HUPCL(tty) _C_FLAG((tty), HUPCL) #define C_CLOCAL(tty) _C_FLAG((tty), CLOCAL) #define C_CIBAUD(tty) _C_FLAG((tty), CIBAUD) #define C_CRTSCTS(tty) _C_FLAG((tty), CRTSCTS) #define C_CMSPAR(tty) _C_FLAG((tty), CMSPAR) #define L_ISIG(tty) _L_FLAG((tty), ISIG) #define L_ICANON(tty) _L_FLAG((tty), ICANON) #define L_XCASE(tty) _L_FLAG((tty), XCASE) #define L_ECHO(tty) _L_FLAG((tty), ECHO) #define L_ECHOE(tty) _L_FLAG((tty), ECHOE) #define L_ECHOK(tty) _L_FLAG((tty), ECHOK) #define L_ECHONL(tty) _L_FLAG((tty), ECHONL) #define L_NOFLSH(tty) _L_FLAG((tty), NOFLSH) #define L_TOSTOP(tty) _L_FLAG((tty), TOSTOP) #define L_ECHOCTL(tty) _L_FLAG((tty), ECHOCTL) #define L_ECHOPRT(tty) _L_FLAG((tty), ECHOPRT) #define L_ECHOKE(tty) _L_FLAG((tty), ECHOKE) #define L_FLUSHO(tty) _L_FLAG((tty), FLUSHO) #define L_PENDIN(tty) _L_FLAG((tty), PENDIN) #define L_IEXTEN(tty) _L_FLAG((tty), IEXTEN) #define L_EXTPROC(tty) _L_FLAG((tty), EXTPROC) struct device; struct signal_struct; /* * Port level information. Each device keeps its own port level information * so provide a common structure for those ports wanting to use common support * routines. * * The tty port has a different lifetime to the tty so must be kept apart. * In addition be careful as tty -> port mappings are valid for the life * of the tty object but in many cases port -> tty mappings are valid only * until a hangup so don't use the wrong path. */ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); /* Control the DTR line */ void (*dtr_rts)(struct tty_port *port, int raise); /* Called when the last close completes or a hangup finishes IFF the port was initialized. Do not use to free resources. Called under the port mutex to serialize against activate/shutdowns */ void (*shutdown)(struct tty_port *port); /* Called under the port mutex from tty_port_open, serialized using the port mutex */ /* FIXME: long term getting the tty argument *out* of this would be good for consoles */ int (*activate)(struct tty_port *port, struct tty_struct *tty); /* Called on the final put of a port */ void (*destruct)(struct tty_port *port); }; struct tty_port_client_operations { int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); void (*write_wakeup)(struct tty_port *port); }; extern const struct tty_port_client_operations tty_port_default_client_ops; struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ struct tty_struct *itty; /* internal back ptr */ const struct tty_port_operations *ops; /* Port operations */ const struct tty_port_client_operations *client_ops; /* Port client operations */ spinlock_t lock; /* Lock protecting tty field */ int blocked_open; /* Waiting to open */ int count; /* Usage count */ wait_queue_head_t open_wait; /* Open waiters */ wait_queue_head_t delta_msr_wait; /* Modem status change */ unsigned long flags; /* User TTY flags ASYNC_ */ unsigned long iflags; /* Internal flags TTY_PORT_ */ unsigned char console:1, /* port is a console */ low_latency:1; /* optional: tune for latency */ struct mutex mutex; /* Locking */ struct mutex buf_mutex; /* Buffer alloc lock */ unsigned char *xmit_buf; /* Optional buffer */ unsigned int close_delay; /* Close port delay */ unsigned int closing_wait; /* Delay for output */ int drain_delay; /* Set to zero if no pure time based drain is needed else set to size of fifo */ struct kref kref; /* Ref counter */ void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ #define TTY_PORT_INITIALIZED 0 /* device is initialized */ #define TTY_PORT_SUSPENDED 1 /* device is suspended */ #define TTY_PORT_ACTIVE 2 /* device is open */ /* * uart drivers: use the uart_port::status field and the UPSTAT_* defines * for s/w-based flow control steering and carrier detection status */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ #define TTY_PORT_KOPENED 5 /* device exclusively opened by kernel */ /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty * has been closed --- for things like the baud rate, etc --- it is * not stored here, but rather a pointer to the real state is stored * here. Possible the winsize structure should have the same * treatment, but (1) the default 80x24 is usually right and (2) it's * most often used by a windowing system, which will set the correct * size each time the window is created or resized anyway. * - TYT, 9/14/92 */ struct tty_operations; struct tty_struct { int magic; struct kref kref; struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; /* Protects ldisc changes: Lock tty not pty */ struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; struct mutex atomic_write_lock; struct mutex legacy_mutex; struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; spinlock_t ctrl_lock; spinlock_t flow_lock; /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ /* * Writes protected by both ctrl lock and legacy mutex, readers must use * at least one of them. */ struct pid *session; unsigned long flags; int count; struct winsize winsize; /* winsize_mutex */ unsigned long stopped:1, /* flow_lock */ flow_stopped:1, unused:BITS_PER_LONG - 2; int hw_stopped; unsigned long ctrl_status:8, /* ctrl_lock */ packet:1, unused_ctrl:BITS_PER_LONG - 9; unsigned int receive_room; /* Bytes free for queue */ int flow_change; struct tty_struct *link; struct fasync_struct *fasync; wait_queue_head_t write_wait; wait_queue_head_t read_wait; struct work_struct hangup_work; void *disc_data; void *driver_data; spinlock_t files_lock; /* protects tty_files list */ struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 int closing; unsigned char *write_buf; int write_cnt; /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { struct tty_struct *tty; struct file *file; struct list_head list; }; /* tty magic number */ #define TTY_MAGIC 0x5401 /* * These bits are used in the flags field of the tty structure. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. */ #define TTY_THROTTLED 0 /* Call unthrottle() at threshold min */ #define TTY_IO_ERROR 1 /* Cause an I/O error (may be no ldisc too) */ #define TTY_OTHER_CLOSED 2 /* Other side (if any) has closed */ #define TTY_EXCLUSIVE 3 /* Exclusive open mode */ #define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ #define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ #define TTY_THROTTLE_SAFE 1 #define TTY_UNTHROTTLE_SAFE 2 static inline void __tty_set_flow_change(struct tty_struct *tty, int val) { tty->flow_change = val; } static inline void tty_set_flow_change(struct tty_struct *tty, int val) { tty->flow_change = val; smp_mb(); } static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || test_bit(TTY_LDISC_CHANGING, &tty->flags); } static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); } static inline bool tty_throttled(struct tty_struct *tty) { return test_bit(TTY_THROTTLED, &tty->flags); } #ifdef CONFIG_TTY extern void tty_kref_put(struct tty_struct *tty); extern struct pid *tty_get_pgrp(struct tty_struct *tty); extern void tty_vhangup_self(void); extern void disassociate_ctty(int priv); extern dev_t tty_devnum(struct tty_struct *tty); extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); extern void tty_ldisc_unlock(struct tty_struct *tty); extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) { return NULL; } static inline void tty_vhangup_self(void) { } static inline void disassociate_ctty(int priv) { } static inline dev_t tty_devnum(struct tty_struct *tty) { return 0; } static inline void proc_clear_tty(struct task_struct *p) { } static inline struct tty_struct *get_current_tty(void) { return NULL; } /* tty_io.c */ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } static inline struct tty_struct *tty_kopen(dev_t device) { return ERR_PTR(-ENODEV); } static inline void tty_kclose(struct tty_struct *tty) { } static inline int tty_dev_name_to_number(const char *name, dev_t *number) { return -ENOTSUPP; } #endif extern struct ktermios tty_std_termios; extern int vcs_init(void); extern struct class *tty_class; /** * tty_kref_get - get a tty reference * @tty: tty device * * Return a new reference to a tty object. The caller must hold * sufficient locks/counts to ensure that their existing reference cannot * go away */ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); return tty; } extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int __tty_check_change(struct tty_struct *tty, int sig); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); extern void stop_tty(struct tty_struct *tty); extern void __start_tty(struct tty_struct *tty); extern void start_tty(struct tty_struct *tty); extern int tty_register_driver(struct tty_driver *driver); extern int tty_unregister_driver(struct tty_driver *driver); extern struct device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev); extern struct device *tty_register_device_attr(struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern void tty_unregister_device(struct tty_driver *driver, unsigned index); extern void tty_write_message(struct tty_struct *tty, char *msg); extern int tty_send_xchar(struct tty_struct *tty, char ch); extern int tty_put_char(struct tty_struct *tty, unsigned char c); extern int tty_chars_in_buffer(struct tty_struct *tty); extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_throttle_safe(struct tty_struct *tty); extern int tty_unthrottle_safe(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); extern void session_clear_tty(struct pid *session); extern void no_tty(void); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); extern void tty_buffer_init(struct tty_port *port); extern void tty_buffer_set_lock_subclass(struct tty_port *port); extern bool tty_buffer_restart_work(struct tty_port *port); extern bool tty_buffer_cancel_work(struct tty_port *port); extern void tty_buffer_flush_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); extern void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** * tty_get_baud_rate - get tty bit rates * @tty: tty to query * * Returns the baud rate as an integer for this terminal. The * termios lock must be held by the caller and the terminal bit * flags may be updated. * * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset); extern int tty_ldisc_reinit(struct tty_struct *tty, int disc); extern const struct seq_operations tty_ldiscs_seq_ops; extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); extern int tty_alloc_file(struct file *file); extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void tty_free_file(struct file *file); extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); extern void tty_release_struct(struct tty_struct *tty, int idx); extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); extern void tty_save_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); extern struct mutex tty_mutex; #define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) extern void tty_port_init(struct tty_port *port); extern void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); extern struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); extern struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); extern void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern void tty_port_destroy(struct tty_port *port); extern void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port && kref_get_unless_zero(&port->kref)) return port; return NULL; } /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_CTS_FLOW, &port->iflags); else clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline bool tty_port_active(struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline void tty_port_set_active(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_ACTIVE, &port->iflags); else clear_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline bool tty_port_check_carrier(struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_CHECK_CD, &port->iflags); else clear_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline bool tty_port_suspended(struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline void tty_port_set_suspended(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_SUSPENDED, &port->iflags); else clear_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline bool tty_port_initialized(struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline void tty_port_set_initialized(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_INITIALIZED, &port->iflags); else clear_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline bool tty_port_kopened(struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } static inline void tty_port_set_kopened(struct tty_port *port, bool val) { if (val) set_bit(TTY_PORT_KOPENED, &port->iflags); else clear_bit(TTY_PORT_KOPENED, &port->iflags); } extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); extern void tty_port_tty_wakeup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); extern void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty); extern int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); extern int tty_set_ldisc(struct tty_struct *tty, int disc); extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); extern void tty_ldisc_release(struct tty_struct *tty); extern int __must_check tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); #ifdef CONFIG_TTY extern void __init n_tty_init(void); #else static inline void n_tty_init(void) { } #endif /* tty_audit.c */ #ifdef CONFIG_AUDIT extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern int tty_audit_push(void); #else static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) { } static inline void tty_audit_exit(void) { } static inline void tty_audit_fork(struct signal_struct *sig) { } static inline int tty_audit_push(void) { return 0; } #endif /* tty_ioctl.c */ extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); /* vt.c */ extern int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); extern long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); /* tty_mutex.c */ /* functions for preparation of BKL removal */ extern void tty_lock(struct tty_struct *tty); extern int tty_lock_interruptible(struct tty_struct *tty); extern void tty_unlock(struct tty_struct *tty); extern void tty_lock_slave(struct tty_struct *tty); extern void tty_unlock_slave(struct tty_struct *tty); extern void tty_set_lock_subclass(struct tty_struct *tty); #ifdef CONFIG_PROC_FS extern void proc_tty_register_driver(struct tty_driver *); extern void proc_tty_unregister_driver(struct tty_driver *); #else static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif #define tty_msg(fn, tty, f, ...) \ fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) #define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) #define tty_info(tty, f, ...) tty_msg(pr_info, tty, f, ##__VA_ARGS__) #define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) #define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) #define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) #define tty_info_ratelimited(tty, f, ...) \ tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/signalfd.h * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * */ #ifndef _LINUX_SIGNALFD_H #define _LINUX_SIGNALFD_H #include <uapi/linux/signalfd.h> #include <linux/sched/signal.h> #ifdef CONFIG_SIGNALFD /* * Deliver the signal to listening signalfd. */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh))) wake_up(&tsk->sighand->signalfd_wqh); } extern void signalfd_cleanup(struct sighand_struct *sighand); #else /* CONFIG_SIGNALFD */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { } static inline void signalfd_cleanup(struct sighand_struct *sighand) { } #endif /* CONFIG_SIGNALFD */ #endif /* _LINUX_SIGNALFD_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NLS_H #define _LINUX_NLS_H #include <linux/init.h> /* Unicode has changed over the years. Unicode code points no longer * fit into 16 bits; as of Unicode 5 valid code points range from 0 * to 0x10ffff (17 planes, where each plane holds 65536 code points). * * The original decision to represent Unicode characters as 16-bit * wchar_t values is now outdated. But plane 0 still includes the * most commonly used characters, so we will retain it. The newer * 32-bit unicode_t type can be used when it is necessary to * represent the full Unicode character set. */ /* Plane-0 Unicode character */ typedef u16 wchar_t; #define MAX_WCHAR_T 0xffff /* Arbitrary Unicode character */ typedef u32 unicode_t; struct nls_table { const char *charset; const char *alias; int (*uni2char) (wchar_t uni, unsigned char *out, int boundlen); int (*char2uni) (const unsigned char *rawstring, int boundlen, wchar_t *uni); const unsigned char *charset2lower; const unsigned char *charset2upper; struct module *owner; struct nls_table *next; }; /* this value hold the maximum octet of charset */ #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */ /* Byte order for UTF-16 strings */ enum utf16_endian { UTF16_HOST_ENDIAN, UTF16_LITTLE_ENDIAN, UTF16_BIG_ENDIAN }; /* nls_base.c */ extern int __register_nls(struct nls_table *, struct module *); extern int unregister_nls(struct nls_table *); extern struct nls_table *load_nls(char *); extern void unload_nls(struct nls_table *); extern struct nls_table *load_nls_default(void); #define register_nls(nls) __register_nls((nls), THIS_MODULE) extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); extern int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, wchar_t *pwcs, int maxlen); extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u8 *s, int maxlen); static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) { unsigned char nc = t->charset2lower[c]; return nc ? nc : c; } static inline unsigned char nls_toupper(struct nls_table *t, unsigned char c) { unsigned char nc = t->charset2upper[c]; return nc ? nc : c; } static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, const unsigned char *s2, int len) { while (len--) { if (nls_tolower(t, *s1++) != nls_tolower(t, *s2++)) return 1; } return 0; } /* * nls_nullsize - return length of null character for codepage * @codepage - codepage for which to return length of NULL terminator * * Since we can't guarantee that the null terminator will be a particular * length, we have to check against the codepage. If there's a problem * determining it, assume a single-byte NULL terminator. */ static inline int nls_nullsize(const struct nls_table *codepage) { int charlen; char tmp[NLS_MAX_CHARSET_SIZE]; charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); return charlen > 0 ? charlen : 1; } #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 /* SPDX-License-Identifier: GPL-2.0+ */ /* * Read-Copy Update mechanism for mutual exclusion * * Copyright IBM Corporation, 2001 * * Author: Dipankar Sarma <dipankar@in.ibm.com> * * Based on the original work by Paul McKenney <paulmck@vnet.ibm.com> * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) * * For detailed explanation of Read-Copy Update mechanism see - * http://lse.sourceforge.net/locking/rcupdate.html * */ #ifndef __LINUX_RCUPDATE_H #define __LINUX_RCUPDATE_H #include <linux/types.h> #include <linux/compiler.h> #include <linux/atomic.h> #include <linux/irqflags.h> #include <linux/preempt.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> #include <asm/processor.h> #include <linux/cpumask.h> #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) #define ulong2long(a) (*(long *)(&(a))) #define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) #define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); #ifdef CONFIG_PREEMPT_RCU void __rcu_read_lock(void); void __rcu_read_unlock(void); /* * Defined as a macro as it is a very low level header included from * areas that don't even know about current. This gives the rcu_read_lock() * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) #else /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TINY_RCU #define rcu_read_unlock_strict() do { } while (0) #else void rcu_read_unlock_strict(void); #endif static inline void __rcu_read_lock(void) { preempt_disable(); } static inline void __rcu_read_unlock(void) { preempt_enable(); rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) { return 0; } #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active __read_mostly; void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); #else static inline void rcu_init_tasks_generic(void) { } #endif #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); #else /* #ifdef CONFIG_RCU_STALL_COMMON */ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ #ifdef CONFIG_NO_HZ_FULL void rcu_user_enter(void); void rcu_user_exit(void); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } #endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); void rcu_nocb_flush_deferred_wakeup(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static inline void rcu_init_nohz(void) { } static inline void rcu_nocb_flush_deferred_wakeup(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /** * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers * @a: Code that RCU needs to pay attention to. * * RCU read-side critical sections are forbidden in the inner idle loop, * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU * will happily ignore any such read-side critical sections. However, * things like powertop need tracepoints in the inner idle loop. * * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) * will tell RCU that it needs to pay attention, invoke its argument * (in this example, calling the do_something_with_RCU() function), * and then tell RCU to go back to ignoring this CPU. It is permissible * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is * on the order of a million or so, even on 32-bit systems). It is * not legal to block within RCU_NONIDLE(), nor is it permissible to * transfer control either into or out of RCU_NONIDLE()'s statement. */ #define RCU_NONIDLE(a) \ do { \ rcu_irq_enter_irqson(); \ do { a; } while (0); \ rcu_irq_exit_irqson(); \ } while (0) /* * Note a quasi-voluntary context switch for RCU-tasks's benefit. * This is a macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU_GENERIC # ifdef CONFIG_TASKS_RCU # define rcu_tasks_classic_qs(t, preempt) \ do { \ if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); # else # define rcu_tasks_classic_qs(t, preempt) do { } while (0) # define call_rcu_tasks call_rcu # define synchronize_rcu_tasks synchronize_rcu # endif # ifdef CONFIG_TASKS_TRACE_RCU # define rcu_tasks_trace_qs(t) \ do { \ if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ !unlikely(READ_ONCE((t)->trc_reader_nesting))) { \ smp_store_release(&(t)->trc_reader_checked, true); \ smp_mb(); /* Readers partitioned by store. */ \ } \ } while (0) # else # define rcu_tasks_trace_qs(t) do { } while (0) # endif #define rcu_tasks_qs(t, preempt) \ do { \ rcu_tasks_classic_qs((t), (preempt)); \ rcu_tasks_trace_qs((t)); \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks_rude(void); # endif #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_qs(t, preempt) do { } while (0) #define rcu_note_voluntary_context_switch(t) do { } while (0) #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to * report potential quiescent states to RCU-tasks even if the cond_resched() * machinery were to be shut off, as some advocate for PREEMPTION kernels. */ #define cond_resched_tasks_rcu_qs() \ do { \ rcu_tasks_qs(current, false); \ cond_resched(); \ } while (0) /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ #if defined(CONFIG_TREE_RCU) #include <linux/rcutree.h> #elif defined(CONFIG_TINY_RCU) #include <linux/rcutiny.h> #else #error "Unknown RCU implementation specified to kernel configuration" #endif /* * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls * are needed for dynamic initialization and destruction of rcu_head * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for * dynamic initialization and destruction of statically allocated rcu_head * structures. However, rcu_head structures allocated dynamically in the * heap don't need any initialization. */ #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head); void destroy_rcu_head(struct rcu_head *head); void init_rcu_head_on_stack(struct rcu_head *head); void destroy_rcu_head_on_stack(struct rcu_head *head); #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ static inline void init_rcu_head(struct rcu_head *head) { } static inline void destroy_rcu_head(struct rcu_head *head) { } static inline void init_rcu_head_on_stack(struct rcu_head *head) { } static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); } extern struct lockdep_map rcu_lock_map; extern struct lockdep_map rcu_bh_lock_map; extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); int rcu_read_lock_sched_held(void); int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) { return 1; } static inline int rcu_read_lock_bh_held(void) { return 1; } static inline int rcu_read_lock_sched_held(void) { return !preemptible(); } static inline int rcu_read_lock_any_held(void) { return !preemptible(); } #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU /** * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check * @s: informative message */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ static bool __section(".data.unlikely") __warned; \ if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) static inline void rcu_preempt_sleep_check(void) { RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) { } #endif /* #else #ifdef CONFIG_PROVE_RCU */ #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh read-side critical section"); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ /* * Helper functions for rcu_dereference_check(), rcu_dereference_protected() * and rcu_assign_pointer(). Some of these could be folded into their * callers, but they are left separate in order to ease introduction of * multiple pointers markings to match different RCU implementations * (e.g., __srcu), should this make sense in the future. */ #ifdef __CHECKER__ #define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ #define __rcu_access_pointer(p, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_check(p, c, space) \ ({ \ /* Dependency order vs. p above. */ \ typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) #define rcu_dereference_raw(p) \ ({ \ /* Dependency order vs. p above. */ \ typeof(p) ________p1 = READ_ONCE(p); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) * * Assigns the specified value to the specified RCU-protected * pointer, ensuring that any concurrent RCU readers will see * any prior initialization. * * Inserts memory barriers on architectures that require them * (which is most of them), and also prevents the compiler from * reordering the code that initializes the structure after the pointer * assignment. More importantly, this call documents which pointers * will be dereferenced by RCU read-side code. * * In some special cases, you may use RCU_INIT_POINTER() instead * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due * to the fact that it does not constrain either the CPU or the compiler. * That said, using RCU_INIT_POINTER() when you should have used * rcu_assign_pointer() is a very bad thing that results in * impossible-to-diagnose memory corruption. So please be careful. * See the RCU_INIT_POINTER() comment header for details. * * Note that rcu_assign_pointer() evaluates each of its arguments only * once, appearances notwithstanding. One of the "extra" evaluations * is in typeof() and the other visible only to sparse (__CHECKER__), * neither of which actually execute the argument. As with most cpp * macros, this execute-arguments-only-once property is important, so * please be careful when making changes to rcu_assign_pointer() and the * other macros that it invokes. */ #define rcu_assign_pointer(p, v) \ do { \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ rcu_check_sparse(p, __rcu); \ \ if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ else \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ } while (0) /** * rcu_replace_pointer() - replace an RCU pointer, returning its old value * @rcu_ptr: RCU pointer, whose old value is returned * @ptr: regular pointer * @c: the lockdep conditions under which the dereference will take place * * Perform a replacement, where @rcu_ptr is an RCU-annotated * pointer and @c is the lockdep argument that is passed to the * rcu_dereference_protected() call used to read that pointer. The old * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr. */ #define rcu_replace_pointer(rcu_ptr, ptr, c) \ ({ \ typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ rcu_assign_pointer((rcu_ptr), (ptr)); \ __tmp; \ }) /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read * * Return the value of the specified RCU-protected pointer, but omit the * lockdep checks for being in an RCU read-side critical section. This is * useful when the value of this pointer is accessed, but the pointer is * not dereferenced, for example, when testing an RCU-protected pointer * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. * * It is also permissible to use rcu_access_pointer() when read-side * access to the pointer was removed at least one grace period ago, as * is the case in the context of the RCU callback that is freeing up * the data, or after a synchronize_rcu() returns. This can be useful * when tearing down multi-linked structures after a grace period * has elapsed. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the * dereference will take place are correct. Typically the conditions * indicate the various locking conditions that should be held at that * point. The check should return true if the conditions are satisfied. * An implicit check for being in an RCU read-side critical section * (rcu_read_lock()) is included. * * For example: * * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); * * Inserts memory barriers on architectures that require them * (currently only the Alpha), prevents the compiler from refetching * (and from merging fetches), and, more importantly, documents exactly * which pointers are protected by RCU and checks that the pointer is * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * This is the RCU-bh counterpart to rcu_dereference_check(). */ #define rcu_dereference_bh_check(p, c) \ __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * This is the RCU-sched counterpart to rcu_dereference_check(). */ #define rcu_dereference_sched_check(p, c) \ __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ __rcu) /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. * * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ #define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * the READ_ONCE(). This is useful in cases where update-side locks * prevent the value of the pointer from changing. Please note that this * primitive does *not* prevent the compiler from repeating this reference * or combining it with other references, so it should not be used without * protection of appropriate locks. * * This function is only for update-side use. Using this function * when protected only by rcu_read_lock() will result in infrequent * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ __rcu_dereference_protected((p), (c), __rcu) /** * rcu_dereference() - fetch RCU-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * This is a simple wrapper around rcu_dereference_check(). */ #define rcu_dereference(p) rcu_dereference_check(p, 0) /** * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) /** * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing * @p: The pointer to read, prior to dereferencing * * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) /** * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism * @p: The pointer to hand off * * This is simply an identity function, but it documents where a pointer * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows:: * * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); * if (long_lived) { * if (!atomic_inc_not_zero(p->refcnt)) * long_lived = false; * else * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); */ #define rcu_pointer_handoff(p) (p) /** * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the * synchronize_rcu() is guaranteed to block until after all the other * CPUs exit their critical sections. Similarly, if call_rcu() is invoked * on one CPU while other CPUs are within RCU read-side critical * sections, invocation of the corresponding RCU callback is deferred * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU * callback is invoked. This is legal, because the RCU read-side critical * section that was running concurrently with the call_rcu() (and which * therefore might be referencing something that the corresponding RCU * callback would free up) has completed before the corresponding * RCU callback is invoked. * * RCU read-side critical sections may be nested. Any deferred actions * will be deferred until the outermost RCU read-side critical section * completes. * * You can avoid reading and understanding the next paragraph by * following this rule: don't put anything in an rcu_read_lock() RCU * read-side critical section that would block in a !PREEMPTION kernel. * But if you want the full story, read on! * * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, * but explicit blocking is illegal. Finally, in preemptible RCU * implementations in real-time (with -rt patchset) kernel builds, RCU * read-side critical sections may be preempted and they may also block, but * only when acquiring spinlocks that are subject to priority inheritance. */ static __always_inline void rcu_read_lock(void) { __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock() used illegally while idle"); } /* * So where is rcu_write_lock()? It does not exist, as there is no * way for writers to lock out RCU readers. This is a feature, not * a bug -- this property is what provides RCU's performance benefits. * Of course, writers must coordinate with each other. The normal * spinlock primitives work well for this, but any other technique may be * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ /** * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * In most situations, rcu_read_unlock() is immune from deadlock. * However, in kernels built with CONFIG_RCU_BOOST, rcu_read_unlock() * is responsible for deboosting, which it does via rt_mutex_unlock(). * Unfortunately, this function acquires the scheduler's runqueue and * priority-inheritance spinlocks. This means that deadlock could result * if the caller of rcu_read_unlock() already holds one of these locks or * any lock that is ever acquired while holding them. * * That said, RCU readers are never priority boosted unless they were * preempted. Therefore, one way to avoid deadlock is to make sure * that preemption never happens within any RCU read-side critical * section whose outermost rcu_read_unlock() is called with one of * rt_mutex_unlock()'s locks held. Such preemption can be avoided in * a number of ways, for example, by invoking preempt_disable() before * critical section's outermost rcu_read_lock(). * * Given that the set of locks acquired by rt_mutex_unlock() might change * at any time, a somewhat more future-proofed approach is to make sure * that that preemption never happens within any RCU read-side critical * section whose outermost rcu_read_unlock() is called with irqs disabled. * This approach relies on the fact that rt_mutex_unlock() currently only * acquires irq-disabled locks. * * The second of these two approaches is best in most situations, * however, the first approach can also be useful, at least to those * developers willing to keep abreast of the set of locks acquired by * rt_mutex_unlock(). * * See rcu_read_lock() for more information. */ static inline void rcu_read_unlock(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); __release(RCU); __rcu_read_unlock(); rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ } /** * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but also disables softirqs. * Note that anything else that disables softirqs can also serve as * an RCU read-side critical section. * * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() * must occur in the same context, for example, it is illegal to invoke * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() * was invoked from some other task. */ static inline void rcu_read_lock_bh(void) { local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); } /** * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ static inline void rcu_read_unlock_bh(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); } /** * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * * This is equivalent of rcu_read_lock(), but disables preemption. * Read-side critical sections can also be introduced by anything else * that disables preemption, including local_irq_disable() and friends. * * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() * must occur in the same context, for example, it is illegal to invoke * rcu_read_unlock_sched() from process context if the matching * rcu_read_lock_sched() was invoked from an NMI handler. */ static inline void rcu_read_lock_sched(void) { preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); __acquire(RCU_SCHED); } /** * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section * * See rcu_read_lock_sched() for more information. */ static inline void rcu_read_unlock_sched(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_unlock_sched_notrace(void) { __release(RCU_SCHED); preempt_enable_notrace(); } /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * @p: The pointer to be initialized. * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to * readers either at compile time or via rcu_assign_pointer() *and* * * a. You have not made *any* reader-visible changes to * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to * other state where exact synchronization is not required.) * * Failure to follow these rules governing use of RCU_INIT_POINTER() will * result in impossible-to-diagnose memory corruption. As in the structures * will look OK in crash dumps, but any concurrent RCU readers might * see pre-initialized values of the referenced data structure. So * please be very careful how you use RCU_INIT_POINTER()!!! * * If you are creating an RCU-protected linked structure that is accessed * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no * ordering guarantees for either the CPU or the compiler. */ #define RCU_INIT_POINTER(p, v) \ do { \ rcu_check_sparse(p, __rcu); \ WRITE_ONCE(p, RCU_INITIALIZER(v)); \ } while (0) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer * @p: The pointer to be initialized. * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ #define RCU_POINTER_INITIALIZER(p, v) \ .p = RCU_INITIALIZER(v) /* * Does the specified offset indicate that the corresponding rcu_head * structure can be handled by kvfree_rcu()? */ #define __is_kvfree_rcu_offset(offset) ((offset) < 4096) /* * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. */ #define __kvfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kvfree_rcu_offset(offset)); \ kvfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ } while (0) /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree * @rhf: the name of the struct rcu_head within the type of @ptr. * * Many rcu callbacks functions just call kfree() on the base structure. * These functions are trivial, but their size adds up, and furthermore * when they are used in a kernel module, that module must invoke the * high-latency rcu_barrier() function at module-unload time. * * The kfree_rcu() function handles this issue. Rather than encoding a * function address in the embedded rcu_head structure, kfree_rcu() instead * encodes the offset of the rcu_head structure within the base structure. * Because the functions are not allowed in the low-order 4096 bytes of * kernel virtual memory, offsets up to 4095 bytes can be accommodated. * If the offset is larger than 4095 bytes, a compile-time error will * be generated in __kvfree_rcu(). If this error is triggered, you can * either fall back to use of call_rcu() or rearrange the structure to * position the rcu_head structure into the first 4096 bytes. * * Note that the allowable offset might decrease in the future, for example, * to allow something like kmem_cache_free_rcu(). * * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ #define kfree_rcu(ptr, rhf) \ do { \ typeof (ptr) ___p = (ptr); \ \ if (___p) \ __kvfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \ } while (0) /** * kvfree_rcu() - kvfree an object after a grace period. * * This macro consists of one or two arguments and it is * based on whether an object is head-less or not. If it * has a head then a semantic stays the same as it used * to be before: * * kvfree_rcu(ptr, rhf); * * where @ptr is a pointer to kvfree(), @rhf is the name * of the rcu_head structure within the type of @ptr. * * When it comes to head-less variant, only one argument * is passed and that is just a pointer which has to be * freed after a grace period. Therefore the semantic is * * kvfree_rcu(ptr); * * where @ptr is a pointer to kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() * annotation. Otherwise, please switch and embed the * rcu_head structure within the type of @ptr. */ #define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \ kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__) #define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME #define kvfree_rcu_arg_2(ptr, rhf) kfree_rcu(ptr, rhf) #define kvfree_rcu_arg_1(ptr) \ do { \ typeof(ptr) ___p = (ptr); \ \ if (___p) \ kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \ } while (0) /* * Place this after a lock-acquisition primitive to guarantee that * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies * if the UNLOCK and LOCK are executed by the same CPU or if the * UNLOCK and LOCK operate on the same lock variable. */ #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE #define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #define smp_mb__after_unlock_lock() do { } while (0) #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ /* Has the specified rcu_head structure been handed to call_rcu()? */ /** * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu() * @rhp: The rcu_head structure to initialize. * * If you intend to invoke rcu_head_after_call_rcu() to test whether a * given rcu_head structure has already been passed to call_rcu(), then * you must also invoke this rcu_head_init() function on it just after * allocating that structure. Calls to this function must not race with * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation. */ static inline void rcu_head_init(struct rcu_head *rhp) { rhp->func = (rcu_callback_t)~0L; } /** * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. * @f: The function passed to call_rcu() along with @rhp. * * Returns @true if the @rhp has been passed to call_rcu() with @func, * and @false otherwise. Emits a warning in any other case, including * the case where @rhp has already been invoked after a grace period. * Calls to this function must not race with callback invocation. One way * to avoid such races is to enclose the call to rcu_head_after_call_rcu() * in an RCU read-side critical section that includes a read-side fetch * of the pointer to the structure containing @rhp. */ static inline bool rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) { rcu_callback_t func = READ_ONCE(rhp->func); if (func == f) return true; WARN_ON_ONCE(func != (rcu_callback_t)~0L); return false; } /* kernel/ksysfs.c definitions */ extern int rcu_expedited; extern int rcu_normal; #endif /* __LINUX_RCUPDATE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux NET3: Internet Group Management Protocol [IGMP] * * Authors: * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Extended to talk the BSD extended IGMP protocol of mrouted 3.6 */ #ifndef _LINUX_IGMP_H #define _LINUX_IGMP_H #include <linux/skbuff.h> #include <linux/timer.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/refcount.h> #include <uapi/linux/igmp.h> static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) { return (struct igmphdr *)skb_transport_header(skb); } static inline struct igmpv3_report * igmpv3_report_hdr(const struct sk_buff *skb) { return (struct igmpv3_report *)skb_transport_header(skb); } static inline struct igmpv3_query * igmpv3_query_hdr(const struct sk_buff *skb) { return (struct igmpv3_query *)skb_transport_header(skb); } struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; __be32 sl_addr[]; }; #define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ (count) * sizeof(__be32)) #define IP_SFBLOCK 10 /* allocate this many at once */ /* ip_mc_socklist is real list now. Speed is not argument; this list never used in fast path code */ struct ip_mc_socklist { struct ip_mc_socklist __rcu *next_rcu; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist __rcu *sflist; struct rcu_head rcu; }; struct ip_sf_list { struct ip_sf_list *sf_next; unsigned long sf_count[2]; /* include/exclude counts */ __be32 sf_inaddr; unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ unsigned char sf_crcount; /* retrans. left to send */ }; struct ip_mc_list { struct in_device *interface; __be32 multiaddr; unsigned int sfmode; struct ip_sf_list *sources; struct ip_sf_list *tomb; unsigned long sfcount[2]; union { struct ip_mc_list *next; struct ip_mc_list __rcu *next_rcu; }; struct ip_mc_list __rcu *next_hash; struct timer_list timer; int users; refcount_t refcnt; spinlock_t lock; char tm_running; char reporter; char unsolicit_count; char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; struct rcu_head rcu; }; /* V3 exponential field decoding */ #define IGMPV3_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) #define IGMPV3_EXP(thresh, nbmant, nbexp, value) \ ((value) < (thresh) ? (value) : \ ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant))) << \ (IGMPV3_MASK((value) >> (nbmant), nbexp) + (nbexp)))) #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len) { if (skb_transport_offset(skb) + ip_transport_len(skb) < len) return 0; return pskb_may_pull(skb, len); } extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr, unsigned int mode); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mreq_source *mreqs, int ifindex); extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex); extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_msfilter __user *optval, int __user *optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage __user *p); extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); extern void ip_mc_up(struct in_device *); extern void ip_mc_down(struct in_device *); extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp); static inline void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { return __ip_mc_dec_group(in_dev, addr, GFP_KERNEL); } extern void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); int ip_mc_check_igmp(struct sk_buff *skb); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0 */ /* * Internal header to deal with irq_desc->status which will be renamed * to irq_desc->settings. */ enum { _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS, _IRQ_PER_CPU = IRQ_PER_CPU, _IRQ_LEVEL = IRQ_LEVEL, _IRQ_NOPROBE = IRQ_NOPROBE, _IRQ_NOREQUEST = IRQ_NOREQUEST, _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, _IRQ_IS_POLLED = IRQ_IS_POLLED, _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, _IRQ_HIDDEN = IRQ_HIDDEN, _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; #define IRQ_PER_CPU GOT_YOU_MORON #define IRQ_NO_BALANCING GOT_YOU_MORON #define IRQ_LEVEL GOT_YOU_MORON #define IRQ_NOPROBE GOT_YOU_MORON #define IRQ_NOREQUEST GOT_YOU_MORON #define IRQ_NOTHREAD GOT_YOU_MORON #define IRQ_NOAUTOEN GOT_YOU_MORON #define IRQ_NESTED_THREAD GOT_YOU_MORON #define IRQ_PER_CPU_DEVID GOT_YOU_MORON #define IRQ_IS_POLLED GOT_YOU_MORON #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON #define IRQ_HIDDEN GOT_YOU_MORON #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON static inline void irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) { desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK); desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); } static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_PER_CPU; } static inline bool irq_settings_is_per_cpu_devid(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_PER_CPU_DEVID; } static inline void irq_settings_set_per_cpu(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_PER_CPU; } static inline void irq_settings_set_no_balancing(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NO_BALANCING; } static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NO_BALANCING; } static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) { return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK; } static inline void irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) { desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK; desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK; } static inline bool irq_settings_is_level(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_LEVEL; } static inline void irq_settings_clr_level(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_LEVEL; } static inline void irq_settings_set_level(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_LEVEL; } static inline bool irq_settings_can_request(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOREQUEST); } static inline void irq_settings_clr_norequest(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOREQUEST; } static inline void irq_settings_set_norequest(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOREQUEST; } static inline bool irq_settings_can_thread(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOTHREAD); } static inline void irq_settings_clr_nothread(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOTHREAD; } static inline void irq_settings_set_nothread(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOTHREAD; } static inline bool irq_settings_can_probe(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOPROBE); } static inline void irq_settings_clr_noprobe(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOPROBE; } static inline void irq_settings_set_noprobe(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOPROBE; } static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_MOVE_PCNTXT; } static inline bool irq_settings_can_autoenable(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOAUTOEN); } static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NESTED_THREAD; } static inline bool irq_settings_is_polled(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_IS_POLLED; } static inline bool irq_settings_disable_unlazy(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_DISABLE_UNLAZY; } static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY; } static inline bool irq_settings_is_hidden(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_HIDDEN; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_ECN_H_ #define _INET_ECN_H_ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/inet_sock.h> #include <net/dsfield.h> enum { INET_ECN_NOT_ECT = 0, INET_ECN_ECT_1 = 1, INET_ECN_ECT_0 = 2, INET_ECN_CE = 3, INET_ECN_MASK = 3, }; extern int sysctl_tunnel_ecn_log; static inline int INET_ECN_is_ce(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_CE; } static inline int INET_ECN_is_not_ect(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; } static inline int INET_ECN_is_capable(__u8 dsfield) { return dsfield & INET_ECN_ECT_0; } /* * RFC 3168 9.1.1 * The full-functionality option for ECN encapsulation is to copy the * ECN codepoint of the inside header to the outside header on * encapsulation if the inside header is not-ECT or ECT, and to set the * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of * the inside header is CE. */ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) { outer &= ~INET_ECN_MASK; outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : INET_ECN_ECT_0; return outer; } static inline void INET_ECN_xmit(struct sock *sk) { inet_sk(sk)->tos |= INET_ECN_ECT_0; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } static inline void INET_ECN_dontxmit(struct sock *sk) { inet_sk(sk)->tos &= ~INET_ECN_MASK; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass &= ~INET_ECN_MASK; } #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ } while (0) #define IP6_ECN_flow_xmit(sk, label) do { \ if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ (label) |= htonl(INET_ECN_ECT_0 << 20); \ } while (0) static inline int IP_ECN_set_ce(struct iphdr *iph) { u32 check = (__force u32)iph->check; u32 ecn = (iph->tos + 1) & INET_ECN_MASK; /* * After the last operation we have (in binary): * INET_ECN_NOT_ECT => 01 * INET_ECN_ECT_1 => 10 * INET_ECN_ECT_0 => 11 * INET_ECN_CE => 00 */ if (!(ecn & 2)) return !ecn; /* * The following gives us: * INET_ECN_ECT_1 => check += htons(0xFFFD) * INET_ECN_ECT_0 => check += htons(0xFFFE) */ check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos |= INET_ECN_CE; return 1; } static inline int IP_ECN_set_ect1(struct iphdr *iph) { u32 check = (__force u32)iph->check; if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; check += (__force u16)htons(0x1); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos ^= INET_ECN_MASK; return 1; } static inline void IP_ECN_clear(struct iphdr *iph) { iph->tos &= ~INET_ECN_MASK; } static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) { dscp &= ~INET_ECN_MASK; ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); } struct ipv6hdr; /* Note: * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE * In IPv6 case, no checksum compensates the change in IPv6 header, * so we have to update skb->csum. */ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; from = *(__be32 *)iph; to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; from = *(__be32 *)iph; to = from ^ htonl(INET_ECN_MASK << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); } static inline int INET_ECN_set_ce(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ce(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } return 0; } static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ect1(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); break; } return 0; } /* * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant * tunnel egress MUST set the outgoing ECN field to the codepoint at the * intersection of the appropriate arriving inner header (row) and outer * header (column) in Figure 4 * * +---------+------------------------------------------------+ * |Arriving | Arriving Outer Header | * | Inner +---------+------------+------------+------------+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE | * +---------+---------+------------+------------+------------+ * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | * | CE | CE | CE | CE(!!!)| CE | * +---------+---------+------------+------------+------------+ * * Figure 4: New IP in IP Decapsulation Behaviour * * returns 0 on success * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { case INET_ECN_NOT_ECT: return 0; case INET_ECN_ECT_0: case INET_ECN_ECT_1: return 1; case INET_ECN_CE: return 2; } } *set_ce = INET_ECN_is_ce(outer); return 0; } static inline int INET_ECN_decapsulate(struct sk_buff *skb, __u8 outer, __u8 inner) { bool set_ce = false; int rc; rc = __INET_ECN_decapsulate(outer, inner, &set_ce); if (!rc) { if (set_ce) INET_ECN_set_ce(skb); else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) INET_ECN_set_ect1(skb); } return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, oiph->tos, inner); } static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM random #if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RANDOM_H #include <linux/writeback.h> #include <linux/tracepoint.h> TRACE_EVENT(add_device_randomness, TP_PROTO(int bytes, unsigned long IP), TP_ARGS(bytes, IP), TP_STRUCT__entry( __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->bytes = bytes; __entry->IP = IP; ), TP_printk("bytes %d caller %pS", __entry->bytes, (void *)__entry->IP) ); DECLARE_EVENT_CLASS(random__mix_pool_bytes, TP_PROTO(const char *pool_name, int bytes, unsigned long IP), TP_ARGS(pool_name, bytes, IP), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, bytes ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->bytes = bytes; __entry->IP = IP; ), TP_printk("%s pool: bytes %d caller %pS", __entry->pool_name, __entry->bytes, (void *)__entry->IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, TP_PROTO(const char *pool_name, int bytes, unsigned long IP), TP_ARGS(pool_name, bytes, IP) ); DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, TP_PROTO(const char *pool_name, int bytes, unsigned long IP), TP_ARGS(pool_name, bytes, IP) ); TRACE_EVENT(credit_entropy_bits, TP_PROTO(const char *pool_name, int bits, int entropy_count, unsigned long IP), TP_ARGS(pool_name, bits, entropy_count, IP), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, bits ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->bits = bits; __entry->entropy_count = entropy_count; __entry->IP = IP; ), TP_printk("%s pool: bits %d entropy_count %d caller %pS", __entry->pool_name, __entry->bits, __entry->entropy_count, (void *)__entry->IP) ); TRACE_EVENT(push_to_pool, TP_PROTO(const char *pool_name, int pool_bits, int input_bits), TP_ARGS(pool_name, pool_bits, input_bits), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, pool_bits ) __field( int, input_bits ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->pool_bits = pool_bits; __entry->input_bits = input_bits; ), TP_printk("%s: pool_bits %d input_pool_bits %d", __entry->pool_name, __entry->pool_bits, __entry->input_bits) ); TRACE_EVENT(debit_entropy, TP_PROTO(const char *pool_name, int debit_bits), TP_ARGS(pool_name, debit_bits), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, debit_bits ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->debit_bits = debit_bits; ), TP_printk("%s: debit_bits %d", __entry->pool_name, __entry->debit_bits) ); TRACE_EVENT(add_input_randomness, TP_PROTO(int input_bits), TP_ARGS(input_bits), TP_STRUCT__entry( __field( int, input_bits ) ), TP_fast_assign( __entry->input_bits = input_bits; ), TP_printk("input_pool_bits %d", __entry->input_bits) ); TRACE_EVENT(add_disk_randomness, TP_PROTO(dev_t dev, int input_bits), TP_ARGS(dev, input_bits), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, input_bits ) ), TP_fast_assign( __entry->dev = dev; __entry->input_bits = input_bits; ), TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->input_bits) ); TRACE_EVENT(xfer_secondary_pool, TP_PROTO(const char *pool_name, int xfer_bits, int request_bits, int pool_entropy, int input_entropy), TP_ARGS(pool_name, xfer_bits, request_bits, pool_entropy, input_entropy), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, xfer_bits ) __field( int, request_bits ) __field( int, pool_entropy ) __field( int, input_entropy ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->xfer_bits = xfer_bits; __entry->request_bits = request_bits; __entry->pool_entropy = pool_entropy; __entry->input_entropy = input_entropy; ), TP_printk("pool %s xfer_bits %d request_bits %d pool_entropy %d " "input_entropy %d", __entry->pool_name, __entry->xfer_bits, __entry->request_bits, __entry->pool_entropy, __entry->input_entropy) ); DECLARE_EVENT_CLASS(random__get_random_bytes, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP), TP_STRUCT__entry( __field( int, nbytes ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->nbytes = nbytes; __entry->IP = IP; ), TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, TP_PROTO(int nbytes, unsigned long IP), TP_ARGS(nbytes, IP) ); DECLARE_EVENT_CLASS(random__extract_entropy, TP_PROTO(const char *pool_name, int nbytes, int entropy_count, unsigned long IP), TP_ARGS(pool_name, nbytes, entropy_count, IP), TP_STRUCT__entry( __field( const char *, pool_name ) __field( int, nbytes ) __field( int, entropy_count ) __field(unsigned long, IP ) ), TP_fast_assign( __entry->pool_name = pool_name; __entry->nbytes = nbytes; __entry->entropy_count = entropy_count; __entry->IP = IP; ), TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", __entry->pool_name, __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy, TP_PROTO(const char *pool_name, int nbytes, int entropy_count, unsigned long IP), TP_ARGS(pool_name, nbytes, entropy_count, IP) ); DEFINE_EVENT(random__extract_entropy, extract_entropy_user, TP_PROTO(const char *pool_name, int nbytes, int entropy_count, unsigned long IP), TP_ARGS(pool_name, nbytes, entropy_count, IP) ); TRACE_EVENT(random_read, TP_PROTO(int got_bits, int need_bits, int pool_left, int input_left), TP_ARGS(got_bits, need_bits, pool_left, input_left), TP_STRUCT__entry( __field( int, got_bits ) __field( int, need_bits ) __field( int, pool_left ) __field( int, input_left ) ), TP_fast_assign( __entry->got_bits = got_bits; __entry->need_bits = need_bits; __entry->pool_left = pool_left; __entry->input_left = input_left; ), TP_printk("got_bits %d still_needed_bits %d " "blocking_pool_entropy_left %d input_entropy_left %d", __entry->got_bits, __entry->got_bits, __entry->pool_left, __entry->input_left) ); TRACE_EVENT(urandom_read, TP_PROTO(int got_bits, int pool_left, int input_left), TP_ARGS(got_bits, pool_left, input_left), TP_STRUCT__entry( __field( int, got_bits ) __field( int, pool_left ) __field( int, input_left ) ), TP_fast_assign( __entry->got_bits = got_bits; __entry->pool_left = pool_left; __entry->input_left = input_left; ), TP_printk("got_bits %d nonblocking_pool_entropy_left %d " "input_entropy_left %d", __entry->got_bits, __entry->pool_left, __entry->input_left) ); TRACE_EVENT(prandom_u32, TP_PROTO(unsigned int ret), TP_ARGS(ret), TP_STRUCT__entry( __field( unsigned int, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret=%u" , __entry->ret) ); #endif /* _TRACE_RANDOM_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/pagevec.h * * In many places it is efficient to batch an operation up against multiple * pages. A pagevec is a multipage container which is used for that. */ #ifndef _LINUX_PAGEVEC_H #define _LINUX_PAGEVEC_H #include <linux/xarray.h> /* 15 pointers + header align the pagevec structure to a power of two */ #define PAGEVEC_SIZE 15 struct page; struct address_space; struct pagevec { unsigned char nr; bool percpu_pvec_drained; struct page *pages[PAGEVEC_SIZE]; }; void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); unsigned pagevec_lookup_entries(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_entries, pgoff_t *indices); void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); static inline unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start) { return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, xa_mark_t tag) { return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); } static inline void pagevec_init(struct pagevec *pvec) { pvec->nr = 0; pvec->percpu_pvec_drained = false; } static inline void pagevec_reinit(struct pagevec *pvec) { pvec->nr = 0; } static inline unsigned pagevec_count(struct pagevec *pvec) { return pvec->nr; } static inline unsigned pagevec_space(struct pagevec *pvec) { return PAGEVEC_SIZE - pvec->nr; } /* * Add a page to a pagevec. Returns the number of slots still available. */ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page) { pvec->pages[pvec->nr++] = page; return pagevec_space(pvec); } static inline void pagevec_release(struct pagevec *pvec) { if (pagevec_count(pvec)) __pagevec_release(pvec); } #endif /* _LINUX_PAGEVEC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGALLOC_H #define _ASM_X86_PGALLOC_H #include <linux/threads.h> #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include <asm-generic/pgalloc.h> static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) {} static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_release_pte(unsigned long pfn) {} static inline void paravirt_release_pmd(unsigned long pfn) {} static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif /* * Flags to use when allocating a user page table page. */ extern gfp_t __userpte_alloc_gfp; #ifdef CONFIG_PAGE_TABLE_ISOLATION /* * Instead of one PGD, we acquire two PGDs. Being order-1, it is * both 8k in size and 8k-aligned. That lets us just flip bit 12 * in a pointer to swap between the two 4k halves. */ #define PGD_ALLOCATION_ORDER 1 #else #define PGD_ALLOCATION_ORDER 0 #endif /* * Allocate and free page tables. */ extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern pgtable_t pte_alloc_one(struct mm_struct *); extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, unsigned long address) { ___pte_free_tlb(tlb, pte); } static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } static inline void pmd_populate_kernel_safe(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); set_pmd_safe(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { unsigned long pfn = page_to_pfn(pte); paravirt_alloc_pte(mm, pfn); set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); } #define pmd_pgtable(pmd) pmd_page(pmd) #if CONFIG_PGTABLE_LEVELS > 2 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { ___pmd_free_tlb(tlb, pmd); } #ifdef CONFIG_X86_PAE extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); #else /* !CONFIG_X86_PAE */ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); } static inline void pud_populate_safe(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); set_pud_safe(pud, __pud(_PAGE_TABLE | __pa(pmd))); } #endif /* CONFIG_X86_PAE */ #if CONFIG_PGTABLE_LEVELS > 3 static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { ___pud_free_tlb(tlb, pud); } #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { if (!pgtable_l5_enabled()) return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { if (!pgtable_l5_enabled()) return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_KERNEL_ACCOUNT; if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; return (p4d_t *)get_zeroed_page(gfp); } static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) { if (!pgtable_l5_enabled()) return; BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); free_page((unsigned long)p4d); } extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { if (pgtable_l5_enabled()) ___p4d_free_tlb(tlb, p4d); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #endif /* _ASM_X86_PGALLOC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 /* SPDX-License-Identifier: GPL-2.0 */ /* rwsem.h: R/W semaphores, public interface * * Written by David Howells (dhowells@redhat.com). * Derived from asm-i386/semaphore.h */ #ifndef _LINUX_RWSEM_H #define _LINUX_RWSEM_H #include <linux/linkage.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/err.h> #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include <linux/osq_lock.h> #endif /* * For an uncontended rwsem, count and owner are the only fields a task * needs to touch when acquiring the rwsem. So they are put next to each * other to increase the chance that they will share the same cacheline. * * In a contended rwsem, the owner is likely the most frequently accessed * field in the structure as the optimistic waiter that holds the osq lock * will spin on owner. For an embedded rwsem, other hot fields in the * containing structure should be moved further away from the rwsem to * reduce the chance that they will share the same cacheline causing * cacheline bouncing problem. */ struct rw_semaphore { atomic_long_t count; /* * Write owner or one of the read owners as well flags regarding * the current state of the rwsem. Can be used as a speculative * check to see if the write owner is running on the cpu. */ atomic_long_t owner; #ifdef CONFIG_RWSEM_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_RWSEMS void *magic; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif }; /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) { return atomic_long_read(&sem->count) != 0; } #define RWSEM_UNLOCKED_VALUE 0L #define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) /* Common initializer macros and functions */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_SLEEP, \ }, #else # define __RWSEM_DEP_MAP_INIT(lockname) #endif #ifdef CONFIG_DEBUG_RWSEMS # define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname, #else # define __RWSEM_DEBUG_INIT(lockname) #endif #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED, #else #define __RWSEM_OPT_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ { __RWSEM_COUNT_INIT(name), \ .owner = ATOMIC_LONG_INIT(0), \ __RWSEM_OPT_INIT(name) \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ .wait_list = LIST_HEAD_INIT((name).wait_list), \ __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) extern void __init_rwsem(struct rw_semaphore *sem, const char *name, struct lock_class_key *key); #define init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ __init_rwsem((sem), #sem, &__key); \ } while (0) /* * This is the same regardless of which rwsem implementation that is being used. * It is just a heuristic meant to be called by somebody alreadying holding the * rwsem to see if somebody from an incompatible type is wanting access to the * lock. */ static inline int rwsem_is_contended(struct rw_semaphore *sem) { return !list_empty(&sem->wait_list); } /* * lock for reading */ extern void down_read(struct rw_semaphore *sem); extern int __must_check down_read_interruptible(struct rw_semaphore *sem); extern int __must_check down_read_killable(struct rw_semaphore *sem); /* * trylock for reading -- returns 1 if successful, 0 if contention */ extern int down_read_trylock(struct rw_semaphore *sem); /* * lock for writing */ extern void down_write(struct rw_semaphore *sem); extern int __must_check down_write_killable(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention */ extern int down_write_trylock(struct rw_semaphore *sem); /* * release a read lock */ extern void up_read(struct rw_semaphore *sem); /* * release a write lock */ extern void up_write(struct rw_semaphore *sem); /* * downgrade write lock to read lock */ extern void downgrade_write(struct rw_semaphore *sem); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * nested locking. NOTE: rwsems are not allowed to recurse * (which occurs if the same task tries to acquire the same * lock instance multiple times), but multiple locks of the * same lock class might be taken, if the order of the locks * is always the same. This ordering rule can be expressed * to lockdep via the _nested() APIs, but enumerating the * subclasses that are used. (If the nesting relationship is * static then another method for expressing nested locking is * the explicit definition of lock class keys and the use of * lockdep_set_class() at lock initialization time. * See Documentation/locking/lockdep-design.rst for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); # define down_write_nest_lock(sem, nest_lock) \ do { \ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ } while (0); /* * Take/release a lock when not the owner will release it. * * [ This API should be avoided as much as possible - the * proper abstraction for this case is completions. ] */ extern void down_read_non_owner(struct rw_semaphore *sem); extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) # define down_read_killable_nested(sem, subclass) down_read_killable(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) # define down_write_killable_nested(sem, subclass) down_write_killable(sem) # define down_read_non_owner(sem) down_read(sem) # define up_read_non_owner(sem) up_read(sem) #endif #endif /* _LINUX_RWSEM_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM filemap #if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FILEMAP_H #include <linux/types.h> #include <linux/tracepoint.h> #include <linux/mm.h> #include <linux/memcontrol.h> #include <linux/device.h> #include <linux/kdev_t.h> #include <linux/errseq.h> DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, TP_PROTO(struct page *page), TP_ARGS(page), TP_STRUCT__entry( __field(unsigned long, pfn) __field(unsigned long, i_ino) __field(unsigned long, index) __field(dev_t, s_dev) ), TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->i_ino = page->mapping->host->i_ino; __entry->index = page->index; if (page->mapping->host->i_sb) __entry->s_dev = page->mapping->host->i_sb->s_dev; else __entry->s_dev = page->mapping->host->i_rdev; ), TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, pfn_to_page(__entry->pfn), __entry->pfn, __entry->index << PAGE_SHIFT) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, TP_PROTO(struct page *page), TP_ARGS(page) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, TP_PROTO(struct page *page), TP_ARGS(page) ); TRACE_EVENT(filemap_set_wb_err, TP_PROTO(struct address_space *mapping, errseq_t eseq), TP_ARGS(mapping, eseq), TP_STRUCT__entry( __field(unsigned long, i_ino) __field(dev_t, s_dev) __field(errseq_t, errseq) ), TP_fast_assign( __entry->i_ino = mapping->host->i_ino; __entry->errseq = eseq; if (mapping->host->i_sb) __entry->s_dev = mapping->host->i_sb->s_dev; else __entry->s_dev = mapping->host->i_rdev; ), TP_printk("dev=%d:%d ino=0x%lx errseq=0x%x", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, __entry->errseq) ); TRACE_EVENT(file_check_and_advance_wb_err, TP_PROTO(struct file *file, errseq_t old), TP_ARGS(file, old), TP_STRUCT__entry( __field(struct file *, file) __field(unsigned long, i_ino) __field(dev_t, s_dev) __field(errseq_t, old) __field(errseq_t, new) ), TP_fast_assign( __entry->file = file; __entry->i_ino = file->f_mapping->host->i_ino; if (file->f_mapping->host->i_sb) __entry->s_dev = file->f_mapping->host->i_sb->s_dev; else __entry->s_dev = file->f_mapping->host->i_rdev; __entry->old = old; __entry->new = file->f_wb_err; ), TP_printk("file=%p dev=%d:%d ino=0x%lx old=0x%x new=0x%x", __entry->file, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, __entry->old, __entry->new) ); #endif /* _TRACE_FILEMAP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_ATOMIC_H #define _ASM_X86_ATOMIC_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> #include <asm/rmwcc.h> #include <asm/barrier.h> /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ /** * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t * * Atomically reads the value of @v. */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, * it's non-inlined function that increases binary size and stack usage. */ return __READ_ONCE((v)->counter); } /** * arch_atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value * * Atomically sets the value of @v to @i. */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { __WRITE_ONCE(v->counter, i); } /** * arch_atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } /** * arch_atomic_sub - subtract integer from atomic variable * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v. */ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } /** * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test /** * arch_atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. */ static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc /** * arch_atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * * Atomically decrements @v by 1. */ static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec /** * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test /** * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test /** * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative /** * arch_atomic_add_return - add integer and return * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns @i + @v */ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic_add_return arch_atomic_add_return /** * arch_atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { return arch_atomic_add_return(-i, v); } #define arch_atomic_sub_return arch_atomic_sub_return static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } #define arch_atomic_fetch_add arch_atomic_fetch_add static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } #define arch_atomic_fetch_sub arch_atomic_fetch_sub static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic_cmpxchg arch_atomic_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } #define arch_atomic_xchg arch_atomic_xchg static __always_inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } #define arch_atomic_fetch_and arch_atomic_fetch_and static __always_inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } #define arch_atomic_fetch_or arch_atomic_fetch_or static __always_inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } #define arch_atomic_fetch_xor arch_atomic_fetch_xor #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else # include <asm/atomic64_64.h> #endif #define ARCH_ATOMIC #endif /* _ASM_X86_ATOMIC_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_CLOCK_H #define _LINUX_SCHED_CLOCK_H #include <linux/smp.h> /* * Do not use outside of architecture code which knows its limitations. * * sched_clock() has no promise of monotonicity or bounded drift between * CPUs, use (which you should not) requires disabling IRQs. * * Please use one of the three interfaces below. */ extern unsigned long long notrace sched_clock(void); /* * See the comment in kernel/sched/clock.c */ extern u64 running_clock(void); extern u64 sched_clock_cpu(int cpu); extern void sched_clock_init(void); #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static inline void sched_clock_tick(void) { } static inline void clear_sched_clock_stable(void) { } static inline void sched_clock_idle_sleep_event(void) { } static inline void sched_clock_idle_wakeup_event(void) { } static inline u64 cpu_clock(int cpu) { return sched_clock(); } static inline u64 local_clock(void) { return sched_clock(); } #else extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); /* * When sched_clock_stable(), __sched_clock_offset provides the offset * between local_clock() and sched_clock(). */ extern u64 __sched_clock_offset; extern void sched_clock_tick(void); extern void sched_clock_tick_stable(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(void); /* * As outlined in clock.c, provides a fast, high resolution, nanosecond * time source that is monotonic per cpu argument and has bounded drift * between cpus. * * ######################### BIG FAT WARNING ########################## * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # * # go backwards !! # * #################################################################### */ static inline u64 cpu_clock(int cpu) { return sched_clock_cpu(cpu); } static inline u64 local_clock(void) { return sched_clock_cpu(raw_smp_processor_id()); } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * An i/f to runtime opt-in for irq time accounting based off of sched_clock. * The reason for this explicit opt-in is not to have perf penalty with * slow sched_clocks. */ extern void enable_sched_clock_irqtime(void); extern void disable_sched_clock_irqtime(void); #else static inline void enable_sched_clock_irqtime(void) {} static inline void disable_sched_clock_irqtime(void) {} #endif #endif /* _LINUX_SCHED_CLOCK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 2003-2004 Red Hat, Inc. All rights reserved. * Copyright 2003-2004 Jeff Garzik * * libata documentation is available via 'make {ps|pdf}docs', * as Documentation/driver-api/libata.rst * * Hardware documentation available from http://www.t13.org/ */ #ifndef __LINUX_ATA_H__ #define __LINUX_ATA_H__ #include <linux/kernel.h> #include <linux/string.h> #include <linux/types.h> #include <asm/byteorder.h> /* defines only for the constants which don't work well as enums */ #define ATA_DMA_BOUNDARY 0xffffUL #define ATA_DMA_MASK 0xffffffffULL enum { /* various global constants */ ATA_MAX_DEVICES = 2, /* per bus/port */ ATA_MAX_PRD = 256, /* we could make these 256/256 */ ATA_SECT_SIZE = 512, ATA_MAX_SECTORS_128 = 128, ATA_MAX_SECTORS = 256, ATA_MAX_SECTORS_1024 = 1024, ATA_MAX_SECTORS_LBA48 = 65535,/* avoid count to be 0000h */ ATA_MAX_SECTORS_TAPE = 65535, ATA_MAX_TRIM_RNUM = 64, /* 512-byte payload / (6-byte LBA + 2-byte range per entry) */ ATA_ID_WORDS = 256, ATA_ID_CONFIG = 0, ATA_ID_CYLS = 1, ATA_ID_HEADS = 3, ATA_ID_SECTORS = 6, ATA_ID_SERNO = 10, ATA_ID_BUF_SIZE = 21, ATA_ID_FW_REV = 23, ATA_ID_PROD = 27, ATA_ID_MAX_MULTSECT = 47, ATA_ID_DWORD_IO = 48, /* before ATA-8 */ ATA_ID_TRUSTED = 48, /* ATA-8 and later */ ATA_ID_CAPABILITY = 49, ATA_ID_OLD_PIO_MODES = 51, ATA_ID_OLD_DMA_MODES = 52, ATA_ID_FIELD_VALID = 53, ATA_ID_CUR_CYLS = 54, ATA_ID_CUR_HEADS = 55, ATA_ID_CUR_SECTORS = 56, ATA_ID_MULTSECT = 59, ATA_ID_LBA_CAPACITY = 60, ATA_ID_SWDMA_MODES = 62, ATA_ID_MWDMA_MODES = 63, ATA_ID_PIO_MODES = 64, ATA_ID_EIDE_DMA_MIN = 65, ATA_ID_EIDE_DMA_TIME = 66, ATA_ID_EIDE_PIO = 67, ATA_ID_EIDE_PIO_IORDY = 68, ATA_ID_ADDITIONAL_SUPP = 69, ATA_ID_QUEUE_DEPTH = 75, ATA_ID_SATA_CAPABILITY = 76, ATA_ID_SATA_CAPABILITY_2 = 77, ATA_ID_FEATURE_SUPP = 78, ATA_ID_MAJOR_VER = 80, ATA_ID_COMMAND_SET_1 = 82, ATA_ID_COMMAND_SET_2 = 83, ATA_ID_CFSSE = 84, ATA_ID_CFS_ENABLE_1 = 85, ATA_ID_CFS_ENABLE_2 = 86, ATA_ID_CSF_DEFAULT = 87, ATA_ID_UDMA_MODES = 88, ATA_ID_HW_CONFIG = 93, ATA_ID_SPG = 98, ATA_ID_LBA_CAPACITY_2 = 100, ATA_ID_SECTOR_SIZE = 106, ATA_ID_WWN = 108, ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ ATA_ID_COMMAND_SET_3 = 119, ATA_ID_COMMAND_SET_4 = 120, ATA_ID_LAST_LUN = 126, ATA_ID_DLF = 128, ATA_ID_CSFO = 129, ATA_ID_CFA_POWER = 160, ATA_ID_CFA_KEY_MGMT = 162, ATA_ID_CFA_MODES = 163, ATA_ID_DATA_SET_MGMT = 169, ATA_ID_SCT_CMD_XPORT = 206, ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), ATA_ID_SERNO_LEN = 20, ATA_ID_FW_REV_LEN = 8, ATA_ID_PROD_LEN = 40, ATA_ID_WWN_LEN = 8, ATA_PCI_CTL_OFS = 2, ATA_PIO0 = (1 << 0), ATA_PIO1 = ATA_PIO0 | (1 << 1), ATA_PIO2 = ATA_PIO1 | (1 << 2), ATA_PIO3 = ATA_PIO2 | (1 << 3), ATA_PIO4 = ATA_PIO3 | (1 << 4), ATA_PIO5 = ATA_PIO4 | (1 << 5), ATA_PIO6 = ATA_PIO5 | (1 << 6), ATA_PIO4_ONLY = (1 << 4), ATA_SWDMA0 = (1 << 0), ATA_SWDMA1 = ATA_SWDMA0 | (1 << 1), ATA_SWDMA2 = ATA_SWDMA1 | (1 << 2), ATA_SWDMA2_ONLY = (1 << 2), ATA_MWDMA0 = (1 << 0), ATA_MWDMA1 = ATA_MWDMA0 | (1 << 1), ATA_MWDMA2 = ATA_MWDMA1 | (1 << 2), ATA_MWDMA3 = ATA_MWDMA2 | (1 << 3), ATA_MWDMA4 = ATA_MWDMA3 | (1 << 4), ATA_MWDMA12_ONLY = (1 << 1) | (1 << 2), ATA_MWDMA2_ONLY = (1 << 2), ATA_UDMA0 = (1 << 0), ATA_UDMA1 = ATA_UDMA0 | (1 << 1), ATA_UDMA2 = ATA_UDMA1 | (1 << 2), ATA_UDMA3 = ATA_UDMA2 | (1 << 3), ATA_UDMA4 = ATA_UDMA3 | (1 << 4), ATA_UDMA5 = ATA_UDMA4 | (1 << 5), ATA_UDMA6 = ATA_UDMA5 | (1 << 6), ATA_UDMA7 = ATA_UDMA6 | (1 << 7), /* ATA_UDMA7 is just for completeness... doesn't exist (yet?). */ ATA_UDMA24_ONLY = (1 << 2) | (1 << 4), ATA_UDMA_MASK_40C = ATA_UDMA2, /* udma0-2 */ /* DMA-related */ ATA_PRD_SZ = 8, ATA_PRD_TBL_SZ = (ATA_MAX_PRD * ATA_PRD_SZ), ATA_PRD_EOT = (1 << 31), /* end-of-table flag */ ATA_DMA_TABLE_OFS = 4, ATA_DMA_STATUS = 2, ATA_DMA_CMD = 0, ATA_DMA_WR = (1 << 3), ATA_DMA_START = (1 << 0), ATA_DMA_INTR = (1 << 2), ATA_DMA_ERR = (1 << 1), ATA_DMA_ACTIVE = (1 << 0), /* bits in ATA command block registers */ ATA_HOB = (1 << 7), /* LBA48 selector */ ATA_NIEN = (1 << 1), /* disable-irq flag */ ATA_LBA = (1 << 6), /* LBA28 selector */ ATA_DEV1 = (1 << 4), /* Select Device 1 (slave) */ ATA_DEVICE_OBS = (1 << 7) | (1 << 5), /* obs bits in dev reg */ ATA_DEVCTL_OBS = (1 << 3), /* obsolete bit in devctl reg */ ATA_BUSY = (1 << 7), /* BSY status bit */ ATA_DRDY = (1 << 6), /* device ready */ ATA_DF = (1 << 5), /* device fault */ ATA_DSC = (1 << 4), /* drive seek complete */ ATA_DRQ = (1 << 3), /* data request i/o */ ATA_CORR = (1 << 2), /* corrected data error */ ATA_SENSE = (1 << 1), /* sense code available */ ATA_ERR = (1 << 0), /* have an error */ ATA_SRST = (1 << 2), /* software reset */ ATA_ICRC = (1 << 7), /* interface CRC error */ ATA_BBK = ATA_ICRC, /* pre-EIDE: block marked bad */ ATA_UNC = (1 << 6), /* uncorrectable media error */ ATA_MC = (1 << 5), /* media changed */ ATA_IDNF = (1 << 4), /* ID not found */ ATA_MCR = (1 << 3), /* media change requested */ ATA_ABORTED = (1 << 2), /* command aborted */ ATA_TRK0NF = (1 << 1), /* track 0 not found */ ATA_AMNF = (1 << 0), /* address mark not found */ ATAPI_LFS = 0xF0, /* last failed sense */ ATAPI_EOM = ATA_TRK0NF, /* end of media */ ATAPI_ILI = ATA_AMNF, /* illegal length indication */ ATAPI_IO = (1 << 1), ATAPI_COD = (1 << 0), /* ATA command block registers */ ATA_REG_DATA = 0x00, ATA_REG_ERR = 0x01, ATA_REG_NSECT = 0x02, ATA_REG_LBAL = 0x03, ATA_REG_LBAM = 0x04, ATA_REG_LBAH = 0x05, ATA_REG_DEVICE = 0x06, ATA_REG_STATUS = 0x07, ATA_REG_FEATURE = ATA_REG_ERR, /* and their aliases */ ATA_REG_CMD = ATA_REG_STATUS, ATA_REG_BYTEL = ATA_REG_LBAM, ATA_REG_BYTEH = ATA_REG_LBAH, ATA_REG_DEVSEL = ATA_REG_DEVICE, ATA_REG_IRQ = ATA_REG_NSECT, /* ATA device commands */ ATA_CMD_DEV_RESET = 0x08, /* ATAPI device reset */ ATA_CMD_CHK_POWER = 0xE5, /* check power mode */ ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */ ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ ATA_CMD_EDD = 0x90, /* execute device diagnostic */ ATA_CMD_DOWNLOAD_MICRO = 0x92, ATA_CMD_DOWNLOAD_MICRO_DMA = 0x93, ATA_CMD_NOP = 0x00, ATA_CMD_FLUSH = 0xE7, ATA_CMD_FLUSH_EXT = 0xEA, ATA_CMD_ID_ATA = 0xEC, ATA_CMD_ID_ATAPI = 0xA1, ATA_CMD_SERVICE = 0xA2, ATA_CMD_READ = 0xC8, ATA_CMD_READ_EXT = 0x25, ATA_CMD_READ_QUEUED = 0x26, ATA_CMD_READ_STREAM_EXT = 0x2B, ATA_CMD_READ_STREAM_DMA_EXT = 0x2A, ATA_CMD_WRITE = 0xCA, ATA_CMD_WRITE_EXT = 0x35, ATA_CMD_WRITE_QUEUED = 0x36, ATA_CMD_WRITE_STREAM_EXT = 0x3B, ATA_CMD_WRITE_STREAM_DMA_EXT = 0x3A, ATA_CMD_WRITE_FUA_EXT = 0x3D, ATA_CMD_WRITE_QUEUED_FUA_EXT = 0x3E, ATA_CMD_FPDMA_READ = 0x60, ATA_CMD_FPDMA_WRITE = 0x61, ATA_CMD_NCQ_NON_DATA = 0x63, ATA_CMD_FPDMA_SEND = 0x64, ATA_CMD_FPDMA_RECV = 0x65, ATA_CMD_PIO_READ = 0x20, ATA_CMD_PIO_READ_EXT = 0x24, ATA_CMD_PIO_WRITE = 0x30, ATA_CMD_PIO_WRITE_EXT = 0x34, ATA_CMD_READ_MULTI = 0xC4, ATA_CMD_READ_MULTI_EXT = 0x29, ATA_CMD_WRITE_MULTI = 0xC5, ATA_CMD_WRITE_MULTI_EXT = 0x39, ATA_CMD_WRITE_MULTI_FUA_EXT = 0xCE, ATA_CMD_SET_FEATURES = 0xEF, ATA_CMD_SET_MULTI = 0xC6, ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, ATA_CMD_VERIFY_EXT = 0x42, ATA_CMD_WRITE_UNCORR_EXT = 0x45, ATA_CMD_STANDBYNOW1 = 0xE0, ATA_CMD_IDLEIMMEDIATE = 0xE1, ATA_CMD_SLEEP = 0xE6, ATA_CMD_INIT_DEV_PARAMS = 0x91, ATA_CMD_READ_NATIVE_MAX = 0xF8, ATA_CMD_READ_NATIVE_MAX_EXT = 0x27, ATA_CMD_SET_MAX = 0xF9, ATA_CMD_SET_MAX_EXT = 0x37, ATA_CMD_READ_LOG_EXT = 0x2F, ATA_CMD_WRITE_LOG_EXT = 0x3F, ATA_CMD_READ_LOG_DMA_EXT = 0x47, ATA_CMD_WRITE_LOG_DMA_EXT = 0x57, ATA_CMD_TRUSTED_NONDATA = 0x5B, ATA_CMD_TRUSTED_RCV = 0x5C, ATA_CMD_TRUSTED_RCV_DMA = 0x5D, ATA_CMD_TRUSTED_SND = 0x5E, ATA_CMD_TRUSTED_SND_DMA = 0x5F, ATA_CMD_PMP_READ = 0xE4, ATA_CMD_PMP_READ_DMA = 0xE9, ATA_CMD_PMP_WRITE = 0xE8, ATA_CMD_PMP_WRITE_DMA = 0xEB, ATA_CMD_CONF_OVERLAY = 0xB1, ATA_CMD_SEC_SET_PASS = 0xF1, ATA_CMD_SEC_UNLOCK = 0xF2, ATA_CMD_SEC_ERASE_PREP = 0xF3, ATA_CMD_SEC_ERASE_UNIT = 0xF4, ATA_CMD_SEC_FREEZE_LOCK = 0xF5, ATA_CMD_SEC_DISABLE_PASS = 0xF6, ATA_CMD_CONFIG_STREAM = 0x51, ATA_CMD_SMART = 0xB0, ATA_CMD_MEDIA_LOCK = 0xDE, ATA_CMD_MEDIA_UNLOCK = 0xDF, ATA_CMD_DSM = 0x06, ATA_CMD_CHK_MED_CRD_TYP = 0xD1, ATA_CMD_CFA_REQ_EXT_ERR = 0x03, ATA_CMD_CFA_WRITE_NE = 0x38, ATA_CMD_CFA_TRANS_SECT = 0x87, ATA_CMD_CFA_ERASE = 0xC0, ATA_CMD_CFA_WRITE_MULT_NE = 0xCD, ATA_CMD_REQ_SENSE_DATA = 0x0B, ATA_CMD_SANITIZE_DEVICE = 0xB4, ATA_CMD_ZAC_MGMT_IN = 0x4A, ATA_CMD_ZAC_MGMT_OUT = 0x9F, /* marked obsolete in the ATA/ATAPI-7 spec */ ATA_CMD_RESTORE = 0x10, /* Subcmds for ATA_CMD_FPDMA_RECV */ ATA_SUBCMD_FPDMA_RECV_RD_LOG_DMA_EXT = 0x01, ATA_SUBCMD_FPDMA_RECV_ZAC_MGMT_IN = 0x02, /* Subcmds for ATA_CMD_FPDMA_SEND */ ATA_SUBCMD_FPDMA_SEND_DSM = 0x00, ATA_SUBCMD_FPDMA_SEND_WR_LOG_DMA_EXT = 0x02, /* Subcmds for ATA_CMD_NCQ_NON_DATA */ ATA_SUBCMD_NCQ_NON_DATA_ABORT_QUEUE = 0x00, ATA_SUBCMD_NCQ_NON_DATA_SET_FEATURES = 0x05, ATA_SUBCMD_NCQ_NON_DATA_ZERO_EXT = 0x06, ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT = 0x07, /* Subcmds for ATA_CMD_ZAC_MGMT_IN */ ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES = 0x00, /* Subcmds for ATA_CMD_ZAC_MGMT_OUT */ ATA_SUBCMD_ZAC_MGMT_OUT_CLOSE_ZONE = 0x01, ATA_SUBCMD_ZAC_MGMT_OUT_FINISH_ZONE = 0x02, ATA_SUBCMD_ZAC_MGMT_OUT_OPEN_ZONE = 0x03, ATA_SUBCMD_ZAC_MGMT_OUT_RESET_WRITE_POINTER = 0x04, /* READ_LOG_EXT pages */ ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_IDENTIFY_DEVICE = 0x30, /* Identify device log pages: */ ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, ATA_LOG_DEVSLP_SIZE = 0x08, ATA_LOG_DEVSLP_MDAT = 0x00, ATA_LOG_DEVSLP_MDAT_MASK = 0x1F, ATA_LOG_DEVSLP_DETO = 0x01, ATA_LOG_DEVSLP_VALID = 0x07, ATA_LOG_DEVSLP_VALID_MASK = 0x80, ATA_LOG_NCQ_PRIO_OFFSET = 0x09, /* NCQ send and receive log */ ATA_LOG_NCQ_SEND_RECV_SUBCMDS_OFFSET = 0x00, ATA_LOG_NCQ_SEND_RECV_SUBCMDS_DSM = (1 << 0), ATA_LOG_NCQ_SEND_RECV_DSM_OFFSET = 0x04, ATA_LOG_NCQ_SEND_RECV_DSM_TRIM = (1 << 0), ATA_LOG_NCQ_SEND_RECV_RD_LOG_OFFSET = 0x08, ATA_LOG_NCQ_SEND_RECV_RD_LOG_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_WR_LOG_OFFSET = 0x0C, ATA_LOG_NCQ_SEND_RECV_WR_LOG_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_OFFSET = 0x10, ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_OUT_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_IN_SUPPORTED = (1 << 1), ATA_LOG_NCQ_SEND_RECV_SIZE = 0x14, /* NCQ Non-Data log */ ATA_LOG_NCQ_NON_DATA_SUBCMDS_OFFSET = 0x00, ATA_LOG_NCQ_NON_DATA_ABORT_OFFSET = 0x00, ATA_LOG_NCQ_NON_DATA_ABORT_NCQ = (1 << 0), ATA_LOG_NCQ_NON_DATA_ABORT_ALL = (1 << 1), ATA_LOG_NCQ_NON_DATA_ABORT_STREAMING = (1 << 2), ATA_LOG_NCQ_NON_DATA_ABORT_NON_STREAMING = (1 << 3), ATA_LOG_NCQ_NON_DATA_ABORT_SELECTED = (1 << 4), ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OFFSET = 0x1C, ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OUT = (1 << 0), ATA_LOG_NCQ_NON_DATA_SIZE = 0x40, /* READ/WRITE LONG (obsolete) */ ATA_CMD_READ_LONG = 0x22, ATA_CMD_READ_LONG_ONCE = 0x23, ATA_CMD_WRITE_LONG = 0x32, ATA_CMD_WRITE_LONG_ONCE = 0x33, /* SETFEATURES stuff */ SETFEATURES_XFER = 0x03, XFER_UDMA_7 = 0x47, XFER_UDMA_6 = 0x46, XFER_UDMA_5 = 0x45, XFER_UDMA_4 = 0x44, XFER_UDMA_3 = 0x43, XFER_UDMA_2 = 0x42, XFER_UDMA_1 = 0x41, XFER_UDMA_0 = 0x40, XFER_MW_DMA_4 = 0x24, /* CFA only */ XFER_MW_DMA_3 = 0x23, /* CFA only */ XFER_MW_DMA_2 = 0x22, XFER_MW_DMA_1 = 0x21, XFER_MW_DMA_0 = 0x20, XFER_SW_DMA_2 = 0x12, XFER_SW_DMA_1 = 0x11, XFER_SW_DMA_0 = 0x10, XFER_PIO_6 = 0x0E, /* CFA only */ XFER_PIO_5 = 0x0D, /* CFA only */ XFER_PIO_4 = 0x0C, XFER_PIO_3 = 0x0B, XFER_PIO_2 = 0x0A, XFER_PIO_1 = 0x09, XFER_PIO_0 = 0x08, XFER_PIO_SLOW = 0x00, SETFEATURES_WC_ON = 0x02, /* Enable write cache */ SETFEATURES_WC_OFF = 0x82, /* Disable write cache */ SETFEATURES_RA_ON = 0xaa, /* Enable read look-ahead */ SETFEATURES_RA_OFF = 0x55, /* Disable read look-ahead */ /* Enable/Disable Automatic Acoustic Management */ SETFEATURES_AAM_ON = 0x42, SETFEATURES_AAM_OFF = 0xC2, SETFEATURES_SPINUP = 0x07, /* Spin-up drive */ SETFEATURES_SPINUP_TIMEOUT = 30000, /* 30s timeout for drive spin-up from PUIS */ SETFEATURES_SATA_ENABLE = 0x10, /* Enable use of SATA feature */ SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ /* SETFEATURE Sector counts for SATA features */ SATA_FPDMA_OFFSET = 0x01, /* FPDMA non-zero buffer offsets */ SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ SATA_DIPM = 0x03, /* Device Initiated Power Management */ SATA_FPDMA_IN_ORDER = 0x04, /* FPDMA in-order data delivery */ SATA_AN = 0x05, /* Asynchronous Notification */ SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, ATA_SET_MAX_LOCK = 0x02, ATA_SET_MAX_UNLOCK = 0x03, ATA_SET_MAX_FREEZE_LOCK = 0x04, ATA_SET_MAX_PASSWD_DMA = 0x05, ATA_SET_MAX_UNLOCK_DMA = 0x06, /* feature values for DEVICE CONFIGURATION OVERLAY */ ATA_DCO_RESTORE = 0xC0, ATA_DCO_FREEZE_LOCK = 0xC1, ATA_DCO_IDENTIFY = 0xC2, ATA_DCO_SET = 0xC3, /* feature values for SMART */ ATA_SMART_ENABLE = 0xD8, ATA_SMART_READ_VALUES = 0xD0, ATA_SMART_READ_THRESHOLDS = 0xD1, /* feature values for Data Set Management */ ATA_DSM_TRIM = 0x01, /* password used in LBA Mid / LBA High for executing SMART commands */ ATA_SMART_LBAM_PASS = 0x4F, ATA_SMART_LBAH_PASS = 0xC2, /* ATAPI stuff */ ATAPI_PKT_DMA = (1 << 0), ATAPI_DMADIR = (1 << 2), /* ATAPI data dir: 0=to device, 1=to host */ ATAPI_CDB_LEN = 16, /* PMP stuff */ SATA_PMP_MAX_PORTS = 15, SATA_PMP_CTRL_PORT = 15, SATA_PMP_GSCR_DWORDS = 128, SATA_PMP_GSCR_PROD_ID = 0, SATA_PMP_GSCR_REV = 1, SATA_PMP_GSCR_PORT_INFO = 2, SATA_PMP_GSCR_ERROR = 32, SATA_PMP_GSCR_ERROR_EN = 33, SATA_PMP_GSCR_FEAT = 64, SATA_PMP_GSCR_FEAT_EN = 96, SATA_PMP_PSCR_STATUS = 0, SATA_PMP_PSCR_ERROR = 1, SATA_PMP_PSCR_CONTROL = 2, SATA_PMP_FEAT_BIST = (1 << 0), SATA_PMP_FEAT_PMREQ = (1 << 1), SATA_PMP_FEAT_DYNSSC = (1 << 2), SATA_PMP_FEAT_NOTIFY = (1 << 3), /* cable types */ ATA_CBL_NONE = 0, ATA_CBL_PATA40 = 1, ATA_CBL_PATA80 = 2, ATA_CBL_PATA40_SHORT = 3, /* 40 wire cable to high UDMA spec */ ATA_CBL_PATA_UNK = 4, /* don't know, maybe 80c? */ ATA_CBL_PATA_IGN = 5, /* don't know, ignore cable handling */ ATA_CBL_SATA = 6, /* SATA Status and Control Registers */ SCR_STATUS = 0, SCR_ERROR = 1, SCR_CONTROL = 2, SCR_ACTIVE = 3, SCR_NOTIFICATION = 4, /* SError bits */ SERR_DATA_RECOVERED = (1 << 0), /* recovered data error */ SERR_COMM_RECOVERED = (1 << 1), /* recovered comm failure */ SERR_DATA = (1 << 8), /* unrecovered data error */ SERR_PERSISTENT = (1 << 9), /* persistent data/comm error */ SERR_PROTOCOL = (1 << 10), /* protocol violation */ SERR_INTERNAL = (1 << 11), /* host internal error */ SERR_PHYRDY_CHG = (1 << 16), /* PHY RDY changed */ SERR_PHY_INT_ERR = (1 << 17), /* PHY internal error */ SERR_COMM_WAKE = (1 << 18), /* Comm wake */ SERR_10B_8B_ERR = (1 << 19), /* 10b to 8b decode error */ SERR_DISPARITY = (1 << 20), /* Disparity */ SERR_CRC = (1 << 21), /* CRC error */ SERR_HANDSHAKE = (1 << 22), /* Handshake error */ SERR_LINK_SEQ_ERR = (1 << 23), /* Link sequence error */ SERR_TRANS_ST_ERROR = (1 << 24), /* Transport state trans. error */ SERR_UNRECOG_FIS = (1 << 25), /* Unrecognized FIS */ SERR_DEV_XCHG = (1 << 26), /* device exchanged */ }; enum ata_prot_flags { /* protocol flags */ ATA_PROT_FLAG_PIO = (1 << 0), /* is PIO */ ATA_PROT_FLAG_DMA = (1 << 1), /* is DMA */ ATA_PROT_FLAG_NCQ = (1 << 2), /* is NCQ */ ATA_PROT_FLAG_ATAPI = (1 << 3), /* is ATAPI */ /* taskfile protocols */ ATA_PROT_UNKNOWN = (u8)-1, ATA_PROT_NODATA = 0, ATA_PROT_PIO = ATA_PROT_FLAG_PIO, ATA_PROT_DMA = ATA_PROT_FLAG_DMA, ATA_PROT_NCQ_NODATA = ATA_PROT_FLAG_NCQ, ATA_PROT_NCQ = ATA_PROT_FLAG_DMA | ATA_PROT_FLAG_NCQ, ATAPI_PROT_NODATA = ATA_PROT_FLAG_ATAPI, ATAPI_PROT_PIO = ATA_PROT_FLAG_ATAPI | ATA_PROT_FLAG_PIO, ATAPI_PROT_DMA = ATA_PROT_FLAG_ATAPI | ATA_PROT_FLAG_DMA, }; enum ata_ioctls { ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ }; /* core structures */ struct ata_bmdma_prd { __le32 addr; __le32 flags_len; }; /* * id tests */ #define ata_id_is_ata(id) (((id)[ATA_ID_CONFIG] & (1 << 15)) == 0) #define ata_id_has_lba(id) ((id)[ATA_ID_CAPABILITY] & (1 << 9)) #define ata_id_has_dma(id) ((id)[ATA_ID_CAPABILITY] & (1 << 8)) #define ata_id_has_ncq(id) ((id)[ATA_ID_SATA_CAPABILITY] & (1 << 8)) #define ata_id_queue_depth(id) (((id)[ATA_ID_QUEUE_DEPTH] & 0x1f) + 1) #define ata_id_removable(id) ((id)[ATA_ID_CONFIG] & (1 << 7)) #define ata_id_has_atapi_AN(id) \ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 5))) #define ata_id_has_fpdma_aa(id) \ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2))) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ (((u32) (id)[(n) + 1] << 16) | ((u32) (id)[(n)])) #define ata_id_u64(id,n) \ ( ((u64) (id)[(n) + 3] << 48) | \ ((u64) (id)[(n) + 2] << 32) | \ ((u64) (id)[(n) + 1] << 16) | \ ((u64) (id)[(n) + 0]) ) #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) #define ata_id_has_ncq_autosense(id) \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { u16 val = id[ATA_ID_SATA_CAPABILITY]; if (val == 0 || val == 0xffff) return false; return val & (1 << 9); } static inline bool ata_id_has_dipm(const u16 *id) { u16 val = id[ATA_ID_FEATURE_SUPP]; if (val == 0 || val == 0xffff) return false; return val & (1 << 3); } static inline bool ata_id_has_fua(const u16 *id) { if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) return false; return id[ATA_ID_CFSSE] & (1 << 6); } static inline bool ata_id_has_flush(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) return false; return id[ATA_ID_COMMAND_SET_2] & (1 << 12); } static inline bool ata_id_flush_enabled(const u16 *id) { if (ata_id_has_flush(id) == 0) return false; if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) return false; return id[ATA_ID_CFS_ENABLE_2] & (1 << 12); } static inline bool ata_id_has_flush_ext(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) return false; return id[ATA_ID_COMMAND_SET_2] & (1 << 13); } static inline bool ata_id_flush_ext_enabled(const u16 *id) { if (ata_id_has_flush_ext(id) == 0) return false; if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) return false; /* * some Maxtor disks have bit 13 defined incorrectly * so check bit 10 too */ return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; } static inline u32 ata_id_logical_sector_size(const u16 *id) { /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. * IDENTIFY DEVICE data, word 117-118. * 0xd000 ignores bit 13 (logical:physical > 1) */ if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000) return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16) + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ; return ATA_SECT_SIZE; } static inline u8 ata_id_log2_per_physical_sector(const u16 *id) { /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. * IDENTIFY DEVICE data, word 106. * 0xe000 ignores bit 12 (logical sector > 512 bytes) */ if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000) return (id[ATA_ID_SECTOR_SIZE] & 0xf); return 0; } /* Offset of logical sectors relative to physical sectors. * * If device has more than one logical sector per physical sector * (aka 512 byte emulation), vendors might offset the "sector 0" address * so sector 63 is "naturally aligned" - e.g. FAT partition table. * This avoids Read/Mod/Write penalties when using FAT partition table * and updating "well aligned" (FS perspective) physical sectors on every * transaction. */ static inline u16 ata_id_logical_sector_offset(const u16 *id, u8 log2_per_phys) { u16 word_209 = id[209]; if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) { u16 first = word_209 & 0x3fff; if (first > 0) return (1 << log2_per_phys) - first; } return 0; } static inline bool ata_id_has_lba48(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) return false; if (!ata_id_u64(id, ATA_ID_LBA_CAPACITY_2)) return false; return id[ATA_ID_COMMAND_SET_2] & (1 << 10); } static inline bool ata_id_lba48_enabled(const u16 *id) { if (ata_id_has_lba48(id) == 0) return false; if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) return false; return id[ATA_ID_CFS_ENABLE_2] & (1 << 10); } static inline bool ata_id_hpa_enabled(const u16 *id) { /* Yes children, word 83 valid bits cover word 82 data */ if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) return false; /* And 87 covers 85-87 */ if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) return false; /* Check command sets enabled as well as supported */ if ((id[ATA_ID_CFS_ENABLE_1] & (1 << 10)) == 0) return false; return id[ATA_ID_COMMAND_SET_1] & (1 << 10); } static inline bool ata_id_has_wcache(const u16 *id) { /* Yes children, word 83 valid bits cover word 82 data */ if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) return false; return id[ATA_ID_COMMAND_SET_1] & (1 << 5); } static inline bool ata_id_has_pm(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) return false; return id[ATA_ID_COMMAND_SET_1] & (1 << 3); } static inline bool ata_id_rahead_enabled(const u16 *id) { if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) return false; return id[ATA_ID_CFS_ENABLE_1] & (1 << 6); } static inline bool ata_id_wcache_enabled(const u16 *id) { if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) return false; return id[ATA_ID_CFS_ENABLE_1] & (1 << 5); } static inline bool ata_id_has_read_log_dma_ext(const u16 *id) { /* Word 86 must have bit 15 set */ if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) return false; /* READ LOG DMA EXT support can be signaled either from word 119 * or from word 120. The format is the same for both words: Bit * 15 must be cleared, bit 14 set and bit 3 set. */ if ((id[ATA_ID_COMMAND_SET_3] & 0xC008) == 0x4008 || (id[ATA_ID_COMMAND_SET_4] & 0xC008) == 0x4008) return true; return false; } static inline bool ata_id_has_sense_reporting(const u16 *id) { if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) return false; return id[ATA_ID_COMMAND_SET_3] & (1 << 6); } static inline bool ata_id_sense_reporting_enabled(const u16 *id) { if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) return false; return id[ATA_ID_COMMAND_SET_4] & (1 << 6); } /** * * Word: 206 - SCT Command Transport * 15:12 - Vendor Specific * 11:6 - Reserved * 5 - SCT Command Transport Data Tables supported * 4 - SCT Command Transport Features Control supported * 3 - SCT Command Transport Error Recovery Control supported * 2 - SCT Command Transport Write Same supported * 1 - SCT Command Transport Long Sector Access supported * 0 - SCT Command Transport supported */ static inline bool ata_id_sct_data_tables(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 5) ? true : false; } static inline bool ata_id_sct_features_ctrl(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 4) ? true : false; } static inline bool ata_id_sct_error_recovery_ctrl(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 3) ? true : false; } static inline bool ata_id_sct_long_sector_access(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 1) ? true : false; } static inline bool ata_id_sct_supported(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 0) ? true : false; } /** * ata_id_major_version - get ATA level of drive * @id: Identify data * * Caveats: * ATA-1 considers identify optional * ATA-2 introduces mandatory identify * ATA-3 introduces word 80 and accurate reporting * * The practical impact of this is that ata_id_major_version cannot * reliably report on drives below ATA3. */ static inline unsigned int ata_id_major_version(const u16 *id) { unsigned int mver; if (id[ATA_ID_MAJOR_VER] == 0xFFFF) return 0; for (mver = 14; mver >= 1; mver--) if (id[ATA_ID_MAJOR_VER] & (1 << mver)) break; return mver; } static inline bool ata_id_is_sata(const u16 *id) { /* * See if word 93 is 0 AND drive is at least ATA-5 compatible * verifying that word 80 by casting it to a signed type -- * this trick allows us to filter out the reserved values of * 0x0000 and 0xffff along with the earlier ATA revisions... */ if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) return true; return false; } static inline bool ata_id_has_tpm(const u16 *id) { /* The TPM bits are only valid on ATA8 */ if (ata_id_major_version(id) < 8) return false; if ((id[48] & 0xC000) != 0x4000) return false; return id[48] & (1 << 0); } static inline bool ata_id_has_dword_io(const u16 *id) { /* ATA 8 reuses this flag for "trusted" computing */ if (ata_id_major_version(id) > 7) return false; return id[ATA_ID_DWORD_IO] & (1 << 0); } static inline bool ata_id_has_trusted(const u16 *id) { if (ata_id_major_version(id) <= 7) return false; return id[ATA_ID_TRUSTED] & (1 << 0); } static inline bool ata_id_has_unload(const u16 *id) { if (ata_id_major_version(id) >= 7 && (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 && id[ATA_ID_CFSSE] & (1 << 13)) return true; return false; } static inline bool ata_id_has_wwn(const u16 *id) { return (id[ATA_ID_CSF_DEFAULT] & 0xC100) == 0x4100; } static inline int ata_id_form_factor(const u16 *id) { u16 val = id[168]; if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) return 0; val &= 0xf; if (val > 5) return 0; return val; } static inline int ata_id_rotation_rate(const u16 *id) { u16 val = id[217]; if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) return 0; if (val > 1 && val < 0x401) return 0; return val; } static inline bool ata_id_has_ncq_send_and_recv(const u16 *id) { return id[ATA_ID_SATA_CAPABILITY_2] & BIT(6); } static inline bool ata_id_has_ncq_non_data(const u16 *id) { return id[ATA_ID_SATA_CAPABILITY_2] & BIT(5); } static inline bool ata_id_has_ncq_prio(const u16 *id) { return id[ATA_ID_SATA_CAPABILITY] & BIT(12); } static inline bool ata_id_has_trim(const u16 *id) { if (ata_id_major_version(id) >= 7 && (id[ATA_ID_DATA_SET_MGMT] & 1)) return true; return false; } static inline bool ata_id_has_zero_after_trim(const u16 *id) { /* DSM supported, deterministic read, and read zero after trim set */ if (ata_id_has_trim(id) && (id[ATA_ID_ADDITIONAL_SUPP] & 0x4020) == 0x4020) return true; return false; } static inline bool ata_id_current_chs_valid(const u16 *id) { /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command has not been issued to the device then the values of id[ATA_ID_CUR_CYLS] to id[ATA_ID_CUR_SECTORS] are vendor specific. */ return (id[ATA_ID_FIELD_VALID] & 1) && /* Current translation valid */ id[ATA_ID_CUR_CYLS] && /* cylinders in current translation */ id[ATA_ID_CUR_HEADS] && /* heads in current translation */ id[ATA_ID_CUR_HEADS] <= 16 && id[ATA_ID_CUR_SECTORS]; /* sectors in current translation */ } static inline bool ata_id_is_cfa(const u16 *id) { if ((id[ATA_ID_CONFIG] == 0x848A) || /* Traditional CF */ (id[ATA_ID_CONFIG] == 0x844A)) /* Delkin Devices CF */ return true; /* * CF specs don't require specific value in the word 0 anymore and yet * they forbid to report the ATA version in the word 80 and require the * CFA feature set support to be indicated in the word 83 in this case. * Unfortunately, some cards only follow either of this requirements, * and while those that don't indicate CFA feature support need some * sort of quirk list, it seems impractical for the ones that do... */ return (id[ATA_ID_COMMAND_SET_2] & 0xC004) == 0x4004; } static inline bool ata_id_is_ssd(const u16 *id) { return id[ATA_ID_ROT_SPEED] == 0x01; } static inline u8 ata_id_zoned_cap(const u16 *id) { return (id[ATA_ID_ADDITIONAL_SUPP] & 0x3); } static inline bool ata_id_pio_need_iordy(const u16 *id, const u8 pio) { /* CF spec. r4.1 Table 22 says no IORDY on PIO5 and PIO6. */ if (pio > 4 && ata_id_is_cfa(id)) return false; /* For PIO3 and higher it is mandatory. */ if (pio > 2) return true; /* Turn it on when possible. */ return ata_id_has_iordy(id); } static inline bool ata_drive_40wire(const u16 *dev_id) { if (ata_id_is_sata(dev_id)) return false; /* SATA */ if ((dev_id[ATA_ID_HW_CONFIG] & 0xE000) == 0x6000) return false; /* 80 wire */ return true; } static inline bool ata_drive_40wire_relaxed(const u16 *dev_id) { if ((dev_id[ATA_ID_HW_CONFIG] & 0x2000) == 0x2000) return false; /* 80 wire */ return true; } static inline int atapi_cdb_len(const u16 *dev_id) { u16 tmp = dev_id[ATA_ID_CONFIG] & 0x3; switch (tmp) { case 0: return 12; case 1: return 16; default: return -1; } } static inline int atapi_command_packet_set(const u16 *dev_id) { return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f; } static inline bool atapi_id_dmadir(const u16 *dev_id) { return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); } /* * ata_id_is_lba_capacity_ok() performs a sanity check on * the claimed LBA capacity value for the device. * * Returns 1 if LBA capacity looks sensible, 0 otherwise. * * It is called only once for each device. */ static inline bool ata_id_is_lba_capacity_ok(u16 *id) { unsigned long lba_sects, chs_sects, head, tail; /* No non-LBA info .. so valid! */ if (id[ATA_ID_CYLS] == 0) return true; lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY); /* * The ATA spec tells large drives to return * C/H/S = 16383/16/63 independent of their size. * Some drives can be jumpered to use 15 heads instead of 16. * Some drives can be jumpered to use 4092 cyls instead of 16383. */ if ((id[ATA_ID_CYLS] == 16383 || (id[ATA_ID_CYLS] == 4092 && id[ATA_ID_CUR_CYLS] == 16383)) && id[ATA_ID_SECTORS] == 63 && (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) && (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS])) return true; chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS]; /* perform a rough sanity check on lba_sects: within 10% is OK */ if (lba_sects - chs_sects < chs_sects/10) return true; /* some drives have the word order reversed */ head = (lba_sects >> 16) & 0xffff; tail = lba_sects & 0xffff; lba_sects = head | (tail << 16); if (lba_sects - chs_sects < chs_sects/10) { *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects); return true; /* LBA capacity is (now) good */ } return false; /* LBA capacity value may be bad */ } static inline void ata_id_to_hd_driveid(u16 *id) { #ifdef __BIG_ENDIAN /* accessed in struct hd_driveid as 8-bit values */ id[ATA_ID_MAX_MULTSECT] = __cpu_to_le16(id[ATA_ID_MAX_MULTSECT]); id[ATA_ID_CAPABILITY] = __cpu_to_le16(id[ATA_ID_CAPABILITY]); id[ATA_ID_OLD_PIO_MODES] = __cpu_to_le16(id[ATA_ID_OLD_PIO_MODES]); id[ATA_ID_OLD_DMA_MODES] = __cpu_to_le16(id[ATA_ID_OLD_DMA_MODES]); id[ATA_ID_MULTSECT] = __cpu_to_le16(id[ATA_ID_MULTSECT]); /* as 32-bit values */ *(u32 *)&id[ATA_ID_LBA_CAPACITY] = ata_id_u32(id, ATA_ID_LBA_CAPACITY); *(u32 *)&id[ATA_ID_SPG] = ata_id_u32(id, ATA_ID_SPG); /* as 64-bit value */ *(u64 *)&id[ATA_ID_LBA_CAPACITY_2] = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); #endif } static inline bool ata_ok(u8 status) { return ((status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR)) == ATA_DRDY); } static inline bool lba_28_ok(u64 block, u32 n_block) { /* check the ending block number: must be LESS THAN 0x0fffffff */ return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= ATA_MAX_SECTORS); } static inline bool lba_48_ok(u64 block, u32 n_block) { /* check the ending block number */ return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= ATA_MAX_SECTORS_LBA48); } #define sata_pmp_gscr_vendor(gscr) ((gscr)[SATA_PMP_GSCR_PROD_ID] & 0xffff) #define sata_pmp_gscr_devid(gscr) ((gscr)[SATA_PMP_GSCR_PROD_ID] >> 16) #define sata_pmp_gscr_rev(gscr) (((gscr)[SATA_PMP_GSCR_REV] >> 8) & 0xff) #define sata_pmp_gscr_ports(gscr) ((gscr)[SATA_PMP_GSCR_PORT_INFO] & 0xf) #endif /* __LINUX_ATA_H__ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM 9p #if !defined(_TRACE_9P_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_9P_H #include <linux/tracepoint.h> #define P9_MSG_T \ EM( P9_TLERROR, "P9_TLERROR" ) \ EM( P9_RLERROR, "P9_RLERROR" ) \ EM( P9_TSTATFS, "P9_TSTATFS" ) \ EM( P9_RSTATFS, "P9_RSTATFS" ) \ EM( P9_TLOPEN, "P9_TLOPEN" ) \ EM( P9_RLOPEN, "P9_RLOPEN" ) \ EM( P9_TLCREATE, "P9_TLCREATE" ) \ EM( P9_RLCREATE, "P9_RLCREATE" ) \ EM( P9_TSYMLINK, "P9_TSYMLINK" ) \ EM( P9_RSYMLINK, "P9_RSYMLINK" ) \ EM( P9_TMKNOD, "P9_TMKNOD" ) \ EM( P9_RMKNOD, "P9_RMKNOD" ) \ EM( P9_TRENAME, "P9_TRENAME" ) \ EM( P9_RRENAME, "P9_RRENAME" ) \ EM( P9_TREADLINK, "P9_TREADLINK" ) \ EM( P9_RREADLINK, "P9_RREADLINK" ) \ EM( P9_TGETATTR, "P9_TGETATTR" ) \ EM( P9_RGETATTR, "P9_RGETATTR" ) \ EM( P9_TSETATTR, "P9_TSETATTR" ) \ EM( P9_RSETATTR, "P9_RSETATTR" ) \ EM( P9_TXATTRWALK, "P9_TXATTRWALK" ) \ EM( P9_RXATTRWALK, "P9_RXATTRWALK" ) \ EM( P9_TXATTRCREATE, "P9_TXATTRCREATE" ) \ EM( P9_RXATTRCREATE, "P9_RXATTRCREATE" ) \ EM( P9_TREADDIR, "P9_TREADDIR" ) \ EM( P9_RREADDIR, "P9_RREADDIR" ) \ EM( P9_TFSYNC, "P9_TFSYNC" ) \ EM( P9_RFSYNC, "P9_RFSYNC" ) \ EM( P9_TLOCK, "P9_TLOCK" ) \ EM( P9_RLOCK, "P9_RLOCK" ) \ EM( P9_TGETLOCK, "P9_TGETLOCK" ) \ EM( P9_RGETLOCK, "P9_RGETLOCK" ) \ EM( P9_TLINK, "P9_TLINK" ) \ EM( P9_RLINK, "P9_RLINK" ) \ EM( P9_TMKDIR, "P9_TMKDIR" ) \ EM( P9_RMKDIR, "P9_RMKDIR" ) \ EM( P9_TRENAMEAT, "P9_TRENAMEAT" ) \ EM( P9_RRENAMEAT, "P9_RRENAMEAT" ) \ EM( P9_TUNLINKAT, "P9_TUNLINKAT" ) \ EM( P9_RUNLINKAT, "P9_RUNLINKAT" ) \ EM( P9_TVERSION, "P9_TVERSION" ) \ EM( P9_RVERSION, "P9_RVERSION" ) \ EM( P9_TAUTH, "P9_TAUTH" ) \ EM( P9_RAUTH, "P9_RAUTH" ) \ EM( P9_TATTACH, "P9_TATTACH" ) \ EM( P9_RATTACH, "P9_RATTACH" ) \ EM( P9_TERROR, "P9_TERROR" ) \ EM( P9_RERROR, "P9_RERROR" ) \ EM( P9_TFLUSH, "P9_TFLUSH" ) \ EM( P9_RFLUSH, "P9_RFLUSH" ) \ EM( P9_TWALK, "P9_TWALK" ) \ EM( P9_RWALK, "P9_RWALK" ) \ EM( P9_TOPEN, "P9_TOPEN" ) \ EM( P9_ROPEN, "P9_ROPEN" ) \ EM( P9_TCREATE, "P9_TCREATE" ) \ EM( P9_RCREATE, "P9_RCREATE" ) \ EM( P9_TREAD, "P9_TREAD" ) \ EM( P9_RREAD, "P9_RREAD" ) \ EM( P9_TWRITE, "P9_TWRITE" ) \ EM( P9_RWRITE, "P9_RWRITE" ) \ EM( P9_TCLUNK, "P9_TCLUNK" ) \ EM( P9_RCLUNK, "P9_RCLUNK" ) \ EM( P9_TREMOVE, "P9_TREMOVE" ) \ EM( P9_RREMOVE, "P9_RREMOVE" ) \ EM( P9_TSTAT, "P9_TSTAT" ) \ EM( P9_RSTAT, "P9_RSTAT" ) \ EM( P9_TWSTAT, "P9_TWSTAT" ) \ EMe(P9_RWSTAT, "P9_RWSTAT" ) /* Define EM() to export the enums to userspace via TRACE_DEFINE_ENUM() */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); P9_MSG_T /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) { a, b }, #define EMe(a, b) { a, b } #define show_9p_op(type) \ __print_symbolic(type, P9_MSG_T) TRACE_EVENT(9p_client_req, TP_PROTO(struct p9_client *clnt, int8_t type, int tag), TP_ARGS(clnt, type, tag), TP_STRUCT__entry( __field( void *, clnt ) __field( __u8, type ) __field( __u32, tag ) ), TP_fast_assign( __entry->clnt = clnt; __entry->type = type; __entry->tag = tag; ), TP_printk("client %lu request %s tag %d", (long)__entry->clnt, show_9p_op(__entry->type), __entry->tag) ); TRACE_EVENT(9p_client_res, TP_PROTO(struct p9_client *clnt, int8_t type, int tag, int err), TP_ARGS(clnt, type, tag, err), TP_STRUCT__entry( __field( void *, clnt ) __field( __u8, type ) __field( __u32, tag ) __field( __u32, err ) ), TP_fast_assign( __entry->clnt = clnt; __entry->type = type; __entry->tag = tag; __entry->err = err; ), TP_printk("client %lu response %s tag %d err %d", (long)__entry->clnt, show_9p_op(__entry->type), __entry->tag, __entry->err) ); /* dump 32 bytes of protocol data */ #define P9_PROTO_DUMP_SZ 32 TRACE_EVENT(9p_protocol_dump, TP_PROTO(struct p9_client *clnt, struct p9_fcall *pdu), TP_ARGS(clnt, pdu), TP_STRUCT__entry( __field( void *, clnt ) __field( __u8, type ) __field( __u16, tag ) __array( unsigned char, line, P9_PROTO_DUMP_SZ ) ), TP_fast_assign( __entry->clnt = clnt; __entry->type = pdu->id; __entry->tag = pdu->tag; memcpy(__entry->line, pdu->sdata, P9_PROTO_DUMP_SZ); ), TP_printk("clnt %lu %s(tag = %d)\n%.3x: %16ph\n%.3x: %16ph\n", (unsigned long)__entry->clnt, show_9p_op(__entry->type), __entry->tag, 0, __entry->line, 16, __entry->line + 16) ); #endif /* _TRACE_9P_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 /* SPDX-License-Identifier: GPL-2.0-only */ /* * pm_runtime.h - Device run-time power management helper functions. * * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl> */ #ifndef _LINUX_PM_RUNTIME_H #define _LINUX_PM_RUNTIME_H #include <linux/device.h> #include <linux/notifier.h> #include <linux/pm.h> #include <linux/jiffies.h> /* Runtime PM flag argument bits */ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; static inline bool queue_pm_work(struct work_struct *work) { return queue_work(pm_wq, work); } extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); extern void pm_runtime_forbid(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. * @dev: Target device. * * Increment the runtime PM usage counter of @dev if its runtime PM status is * %RPM_ACTIVE and its runtime PM usage counter is greater than 0. */ static inline int pm_runtime_get_if_in_use(struct device *dev) { return pm_runtime_get_if_active(dev, false); } /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. * * The dependencies of @dev on its children will not be taken into account by * the runtime PM framework going forward if @enable is %true, or they will * be taken into account otherwise. */ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { dev->power.ignore_children = enable; } /** * pm_runtime_get_noresume - Bump up runtime PM usage counter of a device. * @dev: Target device. */ static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); } /** * pm_runtime_put_noidle - Drop runtime PM usage counter of a device. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev unless it is 0 already. */ static inline void pm_runtime_put_noidle(struct device *dev) { atomic_add_unless(&dev->power.usage_count, -1, 0); } /** * pm_runtime_suspended - Check whether or not a device is runtime-suspended. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev and its runtime PM status is * %RPM_SUSPENDED, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED && !dev->power.disable_depth; } /** * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_active(struct device *dev) { return dev->power.runtime_status == RPM_ACTIVE || dev->power.disable_depth; } /** * pm_runtime_status_suspended - Check if runtime PM status is "suspended". * @dev: Target device. * * Return %true if the runtime PM status of @dev is %RPM_SUSPENDED, or %false * otherwise, regardless of whether or not runtime PM has been enabled for @dev. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which the * runtime PM status of @dev cannot change. */ static inline bool pm_runtime_status_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED; } /** * pm_runtime_enabled - Check if runtime PM is enabled. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev. */ static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; } /** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. * * Return %true if @dev is a special device without runtime PM callbacks or * %false otherwise. */ static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return dev->power.no_callbacks; } /** * pm_runtime_mark_last_busy - Update the last access time of a device. * @dev: Target device. * * Update the last access time of @dev used by the runtime PM autosuspend * mechanism to the current time as returned by ktime_get_mono_fast_ns(). */ static inline void pm_runtime_mark_last_busy(struct device *dev) { WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } /** * pm_runtime_is_irq_safe - Check if runtime PM can work in interrupt context. * @dev: Target device. * * Return %true if @dev has been marked as an "IRQ-safe" device (with respect * to runtime PM), in which case its runtime PM callabcks can be expected to * work correctly when invoked from interrupt handlers. */ static inline bool pm_runtime_is_irq_safe(struct device *dev) { return dev->power.irq_safe; } extern u64 pm_runtime_suspended_time(struct device *dev); #else /* !CONFIG_PM */ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int pm_runtime_force_resume(struct device *dev) { return 0; } static inline int __pm_runtime_idle(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_resume(struct device *dev, int rpmflags) { return 1; } static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } static inline int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) { return -EINVAL; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} static inline void pm_runtime_irq_safe(struct device *dev) {} static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; } static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return false; } static inline void pm_runtime_mark_last_busy(struct device *dev) {} static inline void __pm_runtime_use_autosuspend(struct device *dev, bool use) {} static inline void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) {} static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} #endif /* !CONFIG_PM */ /** * pm_runtime_idle - Conditionally set up autosuspend of a device or suspend it. * @dev: Target device. * * Invoke the "idle check" callback of @dev and, depending on its return value, * set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend has been enabled for it). */ static inline int pm_runtime_idle(struct device *dev) { return __pm_runtime_idle(dev, 0); } /** * pm_runtime_suspend - Suspend a device synchronously. * @dev: Target device. */ static inline int pm_runtime_suspend(struct device *dev) { return __pm_runtime_suspend(dev, 0); } /** * pm_runtime_autosuspend - Set up autosuspend of a device or suspend it. * @dev: Target device. * * Set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend is enabled for it) without engaging its "idle check" callback. */ static inline int pm_runtime_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_AUTO); } /** * pm_runtime_resume - Resume a device synchronously. * @dev: Target device. */ static inline int pm_runtime_resume(struct device *dev) { return __pm_runtime_resume(dev, 0); } /** * pm_request_idle - Queue up "idle check" execution for a device. * @dev: Target device. * * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev * asynchronously. */ static inline int pm_request_idle(struct device *dev) { return __pm_runtime_idle(dev, RPM_ASYNC); } /** * pm_request_resume - Queue up runtime-resume of a device. * @dev: Target device. */ static inline int pm_request_resume(struct device *dev) { return __pm_runtime_resume(dev, RPM_ASYNC); } /** * pm_request_autosuspend - Queue up autosuspend of a device. * @dev: Target device. * * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev * asynchronously. */ static inline int pm_request_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_get - Bump up usage counter and queue up resume of a device. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and queue up a work item to * carry out runtime-resume of it. */ static inline int pm_runtime_get(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_get_sync - Bump up usage counter of a device and resume it. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and carry out runtime-resume of * it synchronously. * * The possible return values of this function are the same as for * pm_runtime_resume() and the runtime PM usage counter of @dev remains * incremented in all cases, even if it returns an error code. */ static inline int pm_runtime_get_sync(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT); } /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. * * Resume @dev synchronously and if that is successful, increment its runtime * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been * incremented or a negative error code otherwise. */ static inline int pm_runtime_resume_and_get(struct device *dev) { int ret; ret = __pm_runtime_resume(dev, RPM_GET_PUT); if (ret < 0) { pm_runtime_put_noidle(dev); return ret; } return 0; } /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). */ static inline int pm_runtime_put(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). */ static inline int pm_runtime_put_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, invoke the "idle check" callback of @dev and, depending on its * return value, set up autosuspend of @dev or suspend it (depending on whether * or not autosuspend has been enabled for it). * * The possible return values of this function are the same as for * pm_runtime_idle() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_suspend - Drop device usage counter and suspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, carry out runtime-suspend of @dev synchronously. * * The possible return values of this function are the same as for * pm_runtime_suspend() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync_suspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_autosuspend - Drop device usage counter and autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending * on whether or not autosuspend has been enabled for it). * * The possible return values of this function are the same as for * pm_runtime_autosuspend() and the runtime PM usage counter of @dev remains * decremented in all cases, even if it returns an error code. */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } /** * pm_runtime_set_active - Set runtime PM status to "active". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_ACTIVE and ensure that dependencies * of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_active(struct device *dev) { return __pm_runtime_set_status(dev, RPM_ACTIVE); } /** * pm_runtime_set_suspended - Set runtime PM status to "suspended". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_SUSPENDED and ensure that * dependencies of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_suspended(struct device *dev) { return __pm_runtime_set_status(dev, RPM_SUSPENDED); } /** * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * * Prevent the runtime PM framework from working with @dev (by incrementing its * "blocking" counter). * * For each invocation of this function for @dev there must be a matching * pm_runtime_enable() call in order for runtime PM to be enabled for it. */ static inline void pm_runtime_disable(struct device *dev) { __pm_runtime_disable(dev, true); } /** * pm_runtime_use_autosuspend - Allow autosuspend to be used for a device. * @dev: Target device. * * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, true); } /** * pm_runtime_dont_use_autosuspend - Prevent autosuspend from being used. * @dev: Target device. * * Prevent the runtime PM autosuspend mechanism from being used for @dev which * means that "autosuspend" will be handled as direct runtime-suspend for it * going forward. */ static inline void pm_runtime_dont_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, false); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Dynamic loading of modules into the kernel. * * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 * Rewritten again by Rusty Russell, 2002 */ #ifndef _LINUX_MODULE_H #define _LINUX_MODULE_H #include <linux/list.h> #include <linux/stat.h> #include <linux/compiler.h> #include <linux/cache.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/elf.h> #include <linux/stringify.h> #include <linux/kobject.h> #include <linux/moduleparam.h> #include <linux/jump_label.h> #include <linux/export.h> #include <linux/rbtree_latch.h> #include <linux/error-injection.h> #include <linux/tracepoint-defs.h> #include <linux/srcu.h> #include <linux/static_call_types.h> #include <linux/percpu.h> #include <asm/module.h> /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) #define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN struct modversion_info { unsigned long crc; char name[MODULE_NAME_LEN]; }; struct module; struct exception_table_entry; struct module_kobject { struct kobject kobj; struct module *mod; struct kobject *drivers_dir; struct module_param_attrs *mp; struct completion *kobj_completion; } __randomize_layout; struct module_attribute { struct attribute attr; ssize_t (*show)(struct module_attribute *, struct module_kobject *, char *); ssize_t (*store)(struct module_attribute *, struct module_kobject *, const char *, size_t count); void (*setup)(struct module *, const char *); int (*test)(struct module *); void (*free)(struct module *); }; struct module_version_attribute { struct module_attribute mattr; const char *module_name; const char *version; } __attribute__ ((__aligned__(sizeof(void *)))); extern ssize_t __modver_version_show(struct module_attribute *, struct module_kobject *, char *); extern struct module_attribute module_uevent; /* These are either module local, or the kernel's dummy ones. */ extern int init_module(void); extern void cleanup_module(void); #ifndef MODULE /** * module_init() - driver initialization entry point * @x: function to be run at kernel boot time or module insertion * * module_init() will either be called during do_initcalls() (if * builtin) or at module insertion time (if a module). There can only * be one per module. */ #define module_init(x) __initcall(x); /** * module_exit() - driver exit entry point * @x: function to be run when driver is removed * * module_exit() will wrap the driver clean-up code * with cleanup_module() when used with rmmod when * the driver is a module. If the driver is statically * compiled into the kernel, module_exit() has no effect. * There can only be one per module. */ #define module_exit(x) __exitcall(x); #else /* MODULE */ /* * In most cases loadable modules do not need custom * initcall levels. There are still some valid cases where * a driver may be needed early if built in, and does not * matter when built as a loadable module. Like bus * snooping debug drivers. */ #define early_initcall(fn) module_init(fn) #define core_initcall(fn) module_init(fn) #define core_initcall_sync(fn) module_init(fn) #define postcore_initcall(fn) module_init(fn) #define postcore_initcall_sync(fn) module_init(fn) #define arch_initcall(fn) module_init(fn) #define subsys_initcall(fn) module_init(fn) #define subsys_initcall_sync(fn) module_init(fn) #define fs_initcall(fn) module_init(fn) #define fs_initcall_sync(fn) module_init(fn) #define rootfs_initcall(fn) module_init(fn) #define device_initcall(fn) module_init(fn) #define device_initcall_sync(fn) module_init(fn) #define late_initcall(fn) module_init(fn) #define late_initcall_sync(fn) module_init(fn) #define console_initcall(fn) module_init(fn) /* Each module must use one module_init(). */ #define module_init(initfn) \ static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ int init_module(void) __copy(initfn) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ void cleanup_module(void) __copy(exitfn) __attribute__((alias(#exitfn))); #endif /* This means "can be init if no module support, otherwise module load may call it." */ #ifdef CONFIG_MODULES #define __init_or_module #define __initdata_or_module #define __initconst_or_module #define __INIT_OR_MODULE .text #define __INITDATA_OR_MODULE .data #define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits #else #define __init_or_module __init #define __initdata_or_module __initdata #define __initconst_or_module __initconst #define __INIT_OR_MODULE __INIT #define __INITDATA_OR_MODULE __INITDATA #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) /* For userspace: you can also call me... */ #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) /* Soft module dependencies. See man modprobe.d for details. * Example: MODULE_SOFTDEP("pre: module-foo module-bar post: module-baz") */ #define MODULE_SOFTDEP(_softdep) MODULE_INFO(softdep, _softdep) /* * MODULE_FILE is used for generating modules.builtin * So, make it no-op when this is being built as a module */ #ifdef MODULE #define MODULE_FILE #else #define MODULE_FILE MODULE_INFO(file, KBUILD_MODFILE); #endif /* * The following license idents are currently accepted as indicating free * software modules * * "GPL" [GNU Public License v2] * "GPL v2" [GNU Public License v2] * "GPL and additional rights" [GNU Public License v2 rights and more] * "Dual BSD/GPL" [GNU Public License v2 * or BSD license choice] * "Dual MIT/GPL" [GNU Public License v2 * or MIT license choice] * "Dual MPL/GPL" [GNU Public License v2 * or Mozilla license choice] * * The following other idents are available * * "Proprietary" [Non free products] * * Both "GPL v2" and "GPL" (the latter also in dual licensed strings) are * merely stating that the module is licensed under the GPL v2, but are not * telling whether "GPL v2 only" or "GPL v2 or later". The reason why there * are two variants is a historic and failed attempt to convey more * information in the MODULE_LICENSE string. For module loading the * "only/or later" distinction is completely irrelevant and does neither * replace the proper license identifiers in the corresponding source file * nor amends them in any way. The sole purpose is to make the * 'Proprietary' flagging work and to refuse to bind symbols which are * exported with EXPORT_SYMBOL_GPL when a non free module is loaded. * * In the same way "BSD" is not a clear license information. It merely * states, that the module is licensed under one of the compatible BSD * license variants. The detailed and correct license information is again * to be found in the corresponding source files. * * There are dual licensed components, but when running with Linux it is the * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL * is a GPL combined work. * * This exists for several reasons * 1. So modinfo can show license info for users wanting to vet their setup * is free * 2. So the community can ignore bug reports including proprietary modules * 3. So vendors can do likewise based on their own policies */ #define MODULE_LICENSE(_license) MODULE_FILE MODULE_INFO(license, _license) /* * Author(s), use "Name <email>" or just "Name", for multiple * authors use multiple MODULE_AUTHOR() statements/lines. */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ extern typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) #endif /* Version of form [<epoch>:]<version>[-<extra-version>]. * Or for CVS/RCS ID version, everything but the number is stripped. * <epoch>: A (small) unsigned integer which allows you to start versions * anew. If not mentioned, it's zero. eg. "2:1.0" is after * "1:2.0". * <version>: The <version> may contain only alphanumerics and the * character `.'. Ordered by numeric sort for numeric parts, * ascii sort for ascii parts (as per RPM or DEB algorithm). * <extraversion>: Like <version>, but inserted for local * customizations, eg "rh3" or "rusty1". * Using this automatically adds a checksum of the .c files and the * local headers in "srcversion". */ #if defined(MODULE) || !defined(CONFIG_SYSFS) #define MODULE_VERSION(_version) MODULE_INFO(version, _version) #else #define MODULE_VERSION(_version) \ MODULE_INFO(version, _version); \ static struct module_version_attribute ___modver_attr = { \ .mattr = { \ .attr = { \ .name = "version", \ .mode = S_IRUGO, \ }, \ .show = __modver_version_show, \ }, \ .module_name = KBUILD_MODNAME, \ .version = _version, \ }; \ static const struct module_version_attribute \ __used __section("__modver") \ * __moduleparam_const __modver_attr = &___modver_attr #endif /* Optional firmware file (or files) needed by the module * format is simply firmware file name. Multiple firmware * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) #define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) struct notifier_block; #ifdef CONFIG_MODULES extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); #define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) /* modules using other modules: kdb wants to see this. */ struct module_use { struct list_head source_list; struct list_head target_list; struct module *source, *target; }; enum module_state { MODULE_STATE_LIVE, /* Normal state. */ MODULE_STATE_COMING, /* Full formed, running module_init. */ MODULE_STATE_GOING, /* Going away. */ MODULE_STATE_UNFORMED, /* Still setting it up. */ }; struct mod_tree_node { struct module *mod; struct latch_tree_node node; }; struct module_layout { /* The actual code + data. */ void *base; /* Total size. */ unsigned int size; /* The size of the executable code. */ unsigned int text_size; /* Size of RO section of the module (text+rodata) */ unsigned int ro_size; /* Size of RO after init section */ unsigned int ro_after_init_size; #ifdef CONFIG_MODULES_TREE_LOOKUP struct mod_tree_node mtn; #endif }; #ifdef CONFIG_MODULES_TREE_LOOKUP /* Only touch one cacheline for common rbtree-for-core-layout case. */ #define __module_layout_align ____cacheline_aligned #else #define __module_layout_align #endif struct mod_kallsyms { Elf_Sym *symtab; unsigned int num_symtab; char *strtab; char *typetab; }; #ifdef CONFIG_LIVEPATCH struct klp_modinfo { Elf_Ehdr hdr; Elf_Shdr *sechdrs; char *secstrings; unsigned int symndx; }; #endif struct module { enum module_state state; /* Member of list of modules */ struct list_head list; /* Unique handle for this module */ char name[MODULE_NAME_LEN]; /* Sysfs stuff. */ struct module_kobject mkobj; struct module_attribute *modinfo_attrs; const char *version; const char *srcversion; struct kobject *holders_dir; /* Exported symbols */ const struct kernel_symbol *syms; const s32 *crcs; unsigned int num_syms; /* Kernel parameters. */ #ifdef CONFIG_SYSFS struct mutex param_lock; #endif struct kernel_param *kp; unsigned int num_kp; /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; const s32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ const struct kernel_symbol *unused_syms; const s32 *unused_crcs; unsigned int num_unused_syms; /* GPL-only, unused exported symbols. */ unsigned int num_unused_gpl_syms; const struct kernel_symbol *unused_gpl_syms; const s32 *unused_gpl_crcs; #endif #ifdef CONFIG_MODULE_SIG /* Signature was verified. */ bool sig_ok; #endif bool async_probe_requested; /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; const s32 *gpl_future_crcs; unsigned int num_gpl_future_syms; /* Exception table */ unsigned int num_exentries; struct exception_table_entry *extable; /* Startup function. */ int (*init)(void); /* Core layout: rbtree is accessed frequently, so keep together. */ struct module_layout core_layout __module_layout_align; struct module_layout init_layout; /* Arch-specific module values */ struct mod_arch_specific arch; unsigned long taints; /* same bits as kernel:taint_flags */ #ifdef CONFIG_GENERIC_BUG /* Support for BUG */ unsigned num_bugs; struct list_head bug_list; struct bug_entry *bug_table; #endif #ifdef CONFIG_KALLSYMS /* Protected by RCU and/or module_mutex: use rcu_dereference() */ struct mod_kallsyms __rcu *kallsyms; struct mod_kallsyms core_kallsyms; /* Section attributes */ struct module_sect_attrs *sect_attrs; /* Notes attributes */ struct module_notes_attrs *notes_attrs; #endif /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; #ifdef CONFIG_SMP /* Per-cpu data. */ void __percpu *percpu; unsigned int percpu_size; #endif void *noinstr_text_start; unsigned int noinstr_text_size; #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; tracepoint_ptr_t *tracepoints_ptrs; #endif #ifdef CONFIG_TREE_SRCU unsigned int num_srcu_structs; struct srcu_struct **srcu_struct_ptrs; #endif #ifdef CONFIG_BPF_EVENTS unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; #endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; #endif #ifdef CONFIG_TRACING unsigned int num_trace_bprintk_fmt; const char **trace_bprintk_fmt_start; #endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call **trace_events; unsigned int num_trace_events; struct trace_eval_map **trace_evals; unsigned int num_trace_evals; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif #ifdef CONFIG_KPROBES void *kprobes_text_start; unsigned int kprobes_text_size; unsigned long *kprobe_blacklist; unsigned int num_kprobe_blacklist; #endif #ifdef CONFIG_HAVE_STATIC_CALL_INLINE int num_static_call_sites; struct static_call_site *static_call_sites; #endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ bool klp_alive; /* Elf information */ struct klp_modinfo *klp_info; #endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head source_list; /* What modules do I depend on? */ struct list_head target_list; /* Destruction function. */ void (*exit)(void); atomic_t refcnt; #endif #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; unsigned int num_ctors; #endif #ifdef CONFIG_FUNCTION_ERROR_INJECTION struct error_injection_entry *ei_funcs; unsigned int num_ei_funcs; #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif #ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym) { return sym->st_value; } #endif extern struct mutex module_mutex; /* FIXME: It'd be nice to isolate modules during init, too, so they aren't used before they (may) fail. But presently too much code (IDE & SCSI) require entry into the module during init.*/ static inline bool module_is_live(struct module *mod) { return mod->state != MODULE_STATE_GOING; } struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); static inline bool within_module_core(unsigned long addr, const struct module *mod) { return (unsigned long)mod->core_layout.base <= addr && addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; } static inline bool within_module_init(unsigned long addr, const struct module *mod) { return (unsigned long)mod->init_layout.base <= addr && addr < (unsigned long)mod->init_layout.base + mod->init_layout.size; } static inline bool within_module(unsigned long addr, const struct module *mod) { return within_module_init(addr, mod) || within_module_core(addr, mod); } /* Search for module by name: must hold module_mutex. */ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; const s32 *crcs; enum mod_license { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, } license; bool unused; }; /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported); /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); extern void __noreturn __module_put_and_exit(struct module *mod, long code); #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(__stringify(x)) void symbol_put_addr(void *addr); /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ extern void __module_get(struct module *module); /* This is the Right Way to get a module: if it fails, it's being removed, * so pretend it's not there. */ extern bool try_module_get(struct module *module); extern void module_put(struct module *module); #else /*!CONFIG_MODULE_UNLOAD*/ static inline bool try_module_get(struct module *module) { return !module || module_is_live(module); } static inline void module_put(struct module *module) { } static inline void __module_get(struct module *module) { } #define symbol_put(x) do { } while (0) #define symbol_put_addr(p) do { } while (0) #endif /* CONFIG_MODULE_UNLOAD */ /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ ({ \ struct module *__mod = (mod); \ __mod ? __mod->name : "kernel"; \ }) /* Dereference module function descriptor */ void *dereference_module_function_descriptor(struct module *mod, void *ptr); /* For kallsyms to ask for address resolution. namebuf should be at * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ const char *module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); int register_module_notifier(struct notifier_block *nb); int unregister_module_notifier(struct notifier_block *nb); extern void print_modules(void); static inline bool module_requested_async_probing(struct module *module) { return module && module->async_probe_requested; } #ifdef CONFIG_LIVEPATCH static inline bool is_livepatch_module(struct module *mod) { return mod->klp; } #else /* !CONFIG_LIVEPATCH */ static inline bool is_livepatch_module(struct module *mod) { return false; } #endif /* CONFIG_LIVEPATCH */ bool is_module_sig_enforced(void); void set_module_sig_enforced(void); #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) { return NULL; } static inline struct module *__module_text_address(unsigned long addr) { return NULL; } static inline bool is_module_address(unsigned long addr) { return false; } static inline bool is_module_percpu_address(unsigned long addr) { return false; } static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) { return false; } static inline bool is_module_text_address(unsigned long addr) { return false; } static inline bool within_module_core(unsigned long addr, const struct module *mod) { return false; } static inline bool within_module_init(unsigned long addr, const struct module *mod) { return false; } static inline bool within_module(unsigned long addr, const struct module *mod) { return false; } /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) #define symbol_put(x) do { } while (0) #define symbol_put_addr(x) do { } while (0) static inline void __module_get(struct module *module) { } static inline bool try_module_get(struct module *module) { return true; } static inline void module_put(struct module *module) { } #define module_name(mod) "kernel" /* For kallsyms to ask for address resolution. NULL means not found. */ static inline const char *module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf) { return NULL; } static inline int lookup_module_symbol_name(unsigned long addr, char *symname) { return -ERANGE; } static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) { return -ERANGE; } static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported) { return -ERANGE; } static inline unsigned long module_kallsyms_lookup_name(const char *name) { return 0; } static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) { return 0; } static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ return 0; } static inline int unregister_module_notifier(struct notifier_block *nb) { return 0; } #define module_put_and_exit(code) do_exit(code) static inline void print_modules(void) { } static inline bool module_requested_async_probing(struct module *module) { return false; } static inline bool is_module_sig_enforced(void) { return false; } static inline void set_module_sig_enforced(void) { } /* Dereference module function descriptor */ static inline void *dereference_module_function_descriptor(struct module *mod, void *ptr) { return ptr; } #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS extern struct kset *module_kset; extern struct kobj_type module_ktype; extern int module_sysfs_initialized; #endif /* CONFIG_SYSFS */ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */ #define __MODULE_STRING(x) __stringify(x) #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ #ifdef CONFIG_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) { return true; } #endif #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { return module->sig_ok; } #else /* !CONFIG_MODULE_SIG */ static inline bool module_sig_ok(struct module *module) { return true; } #endif /* CONFIG_MODULE_SIG */ #endif /* _LINUX_MODULE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _DELAYED_CALL_H #define _DELAYED_CALL_H /* * Poor man's closures; I wish we could've done them sanely polymorphic, * but... */ struct delayed_call { void (*fn)(void *); void *arg; }; #define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL} /* I really wish we had closures with sane typechecking... */ static inline void set_delayed_call(struct delayed_call *call, void (*fn)(void *), void *arg) { call->fn = fn; call->arg = arg; } static inline void do_delayed_call(struct delayed_call *call) { if (call->fn) call->fn(call->arg); } static inline void clear_delayed_call(struct delayed_call *call) { call->fn = NULL; } #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PAGE_64_H #define _ASM_X86_PAGE_64_H #include <asm/page_64_types.h> #ifndef __ASSEMBLY__ #include <asm/alternative.h> /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; extern unsigned long phys_base; extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; static inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; /* use the carry flag to determine if x was < __START_KERNEL_map */ x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); return x; } #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) #define __phys_addr_symbol(x) \ ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif #define __phys_reloc_hide(x) (x) #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) ((pfn) < max_pfn) #endif void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); static inline void clear_page(void *page) { alternative_call_2(clear_page_orig, clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), "0" (page) : "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION # define __HAVE_ARCH_GATE_AREA 1 #endif #endif /* _ASM_X86_PAGE_64_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 /* SPDX-License-Identifier: GPL-2.0 */ /* * ioport.h Definitions of routines for detecting, reserving and * allocating system resources. * * Authors: Linus Torvalds */ #ifndef _LINUX_IOPORT_H #define _LINUX_IOPORT_H #ifndef __ASSEMBLY__ #include <linux/compiler.h> #include <linux/types.h> #include <linux/bits.h> /* * Resources are tree-like, allowing * nesting etc.. */ struct resource { resource_size_t start; resource_size_t end; const char *name; unsigned long flags; unsigned long desc; struct resource *parent, *sibling, *child; }; /* * IO resources have these defined flags. * * PCI devices expose these flags to userspace in the "resource" sysfs file, * so don't move them. */ #define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ #define IORESOURCE_TYPE_BITS 0x00001f00 /* Resource type */ #define IORESOURCE_IO 0x00000100 /* PCI/ISA I/O ports */ #define IORESOURCE_MEM 0x00000200 #define IORESOURCE_REG 0x00000300 /* Register offsets */ #define IORESOURCE_IRQ 0x00000400 #define IORESOURCE_DMA 0x00000800 #define IORESOURCE_BUS 0x00001000 #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ #define IORESOURCE_READONLY 0x00004000 #define IORESOURCE_CACHEABLE 0x00008000 #define IORESOURCE_RANGELENGTH 0x00010000 #define IORESOURCE_SHADOWABLE 0x00020000 #define IORESOURCE_SIZEALIGN 0x00040000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00080000 /* start field is alignment */ #define IORESOURCE_MEM_64 0x00100000 #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ #define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ #define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ #define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ /* IORESOURCE_SYSRAM specific bits. */ #define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */ #define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */ #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 /* No address assigned yet */ #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ /* I/O resource extended types */ #define IORESOURCE_SYSTEM_RAM (IORESOURCE_MEM|IORESOURCE_SYSRAM) /* PnP IRQ specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IRQ_HIGHEDGE (1<<0) #define IORESOURCE_IRQ_LOWEDGE (1<<1) #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) #define IORESOURCE_IRQ_OPTIONAL (1<<5) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) #define IORESOURCE_DMA_8BIT (0<<0) #define IORESOURCE_DMA_8AND16BIT (1<<0) #define IORESOURCE_DMA_16BIT (2<<0) #define IORESOURCE_DMA_MASTER (1<<2) #define IORESOURCE_DMA_BYTE (1<<3) #define IORESOURCE_DMA_WORD (1<<4) #define IORESOURCE_DMA_SPEED_MASK (3<<6) #define IORESOURCE_DMA_COMPATIBLE (0<<6) #define IORESOURCE_DMA_TYPEA (1<<6) #define IORESOURCE_DMA_TYPEB (2<<6) #define IORESOURCE_DMA_TYPEF (3<<6) /* PnP memory I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */ #define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */ #define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */ #define IORESOURCE_MEM_TYPE_MASK (3<<3) #define IORESOURCE_MEM_8BIT (0<<3) #define IORESOURCE_MEM_16BIT (1<<3) #define IORESOURCE_MEM_8AND16BIT (2<<3) #define IORESOURCE_MEM_32BIT (3<<3) #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) /* PnP I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IO_16BIT_ADDR (1<<0) #define IORESOURCE_IO_FIXED (1<<1) #define IORESOURCE_IO_SPARSE (1<<2) /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ #define IORESOURCE_ROM_SHADOW (1<<1) /* Use RAM image, not ROM BAR */ /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ #define IORESOURCE_PCI_EA_BEI (1<<5) /* BAR Equivalent Indicator */ /* * I/O Resource Descriptors * * Descriptors are used by walk_iomem_res_desc() and region_intersects() * for searching a specific resource range in the iomem table. Assign * a new descriptor when a resource range supports the search interfaces. * Otherwise, resource.desc must be set to IORES_DESC_NONE (0). */ enum { IORES_DESC_NONE = 0, IORES_DESC_CRASH_KERNEL = 1, IORES_DESC_ACPI_TABLES = 2, IORES_DESC_ACPI_NV_STORAGE = 3, IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, IORES_DESC_SOFT_RESERVED = 8, }; /* * Flags controlling ioremap() behavior. */ enum { IORES_MAP_SYSTEM_RAM = BIT(0), IORES_MAP_ENCRYPTED = BIT(1), }; /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ .desc = IORES_DESC_NONE, \ } #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) #define DEFINE_RES_IO(_start, _size) \ DEFINE_RES_IO_NAMED((_start), (_size), NULL) #define DEFINE_RES_MEM_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_MEM) #define DEFINE_RES_MEM(_start, _size) \ DEFINE_RES_MEM_NAMED((_start), (_size), NULL) #define DEFINE_RES_IRQ_NAMED(_irq, _name) \ DEFINE_RES_NAMED((_irq), 1, (_name), IORESOURCE_IRQ) #define DEFINE_RES_IRQ(_irq) \ DEFINE_RES_IRQ_NAMED((_irq), NULL) #define DEFINE_RES_DMA_NAMED(_dma, _name) \ DEFINE_RES_NAMED((_dma), 1, (_name), IORESOURCE_DMA) #define DEFINE_RES_DMA(_dma) \ DEFINE_RES_DMA_NAMED((_dma), NULL) /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); void release_child_resources(struct resource *new); extern void reserve_region_with_split(struct resource *root, resource_size_t start, resource_size_t end, const char *name); extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int remove_resource(struct resource *old); extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, resource_size_t (*alignf)(void *, const struct resource *, resource_size_t, resource_size_t), void *alignf_data); struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; } static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } static inline unsigned long resource_ext_type(const struct resource *res) { return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET) return false; return r1->start <= r2->start && r1->end >= r2->end; } /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) #define request_mem_region_exclusive(start,n,name) \ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, resource_size_t n, const char *name, int flags); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) #define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) extern void __release_region(struct resource *, resource_size_t, resource_size_t); #ifdef CONFIG_MEMORY_HOTREMOVE extern void release_mem_region_adjustable(resource_size_t, resource_size_t); #endif #ifdef CONFIG_MEMORY_HOTPLUG extern void merge_system_ram_resource(struct resource *res); #endif /* Wrappers for managed devices */ struct device; extern int devm_request_resource(struct device *dev, struct resource *root, struct resource *new); extern void devm_release_resource(struct device *dev, struct resource *new); #define devm_request_region(dev,start,n,name) \ __devm_request_region(dev, &ioport_resource, (start), (n), (name)) #define devm_request_mem_region(dev,start,n,name) \ __devm_request_region(dev, &iomem_resource, (start), (n), (name)) extern struct resource * __devm_request_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n, const char *name); #define devm_release_region(dev, start, n) \ __devm_release_region(dev, &ioport_resource, (start), (n)) #define devm_release_mem_region(dev, start, n) \ __devm_release_region(dev, &iomem_resource, (start), (n)) extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); extern bool iomem_is_exclusive(u64 addr); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int walk_mem_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) { return (r1->start <= r2->end && r1->end >= r2->start); } struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res); #else static inline void revoke_devmem(struct resource *res) { }; #endif #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_MQ_SCHED_H #define BLK_MQ_SCHED_H #include "blk-mq.h" #include "blk-mq-tag.h" void blk_mq_sched_assign_ioc(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async); void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list, bool run_queue_async); void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_sched_free_requests(struct request_queue *q); static inline bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return false; return __blk_mq_sched_bio_merge(q, bio, nr_segs); } static inline bool blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, struct bio *bio) { struct elevator_queue *e = q->elevator; if (e && e->type->ops.allow_merge) return e->type->ops.allow_merge(q, rq, bio); return true; } static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { struct elevator_queue *e = rq->q->elevator; if (e && e->type->ops.completed_request) e->type->ops.completed_request(rq, now); } static inline void blk_mq_sched_requeue_request(struct request *rq) { struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) e->type->ops.requeue_request(rq); } static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) { struct elevator_queue *e = hctx->queue->elevator; if (e && e->type->ops.has_work) return e->type->ops.has_work(hctx); return false; } static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H #include <linux/sched/coredump.h> #include <linux/mm_types.h> #include <linux/fs.h> /* only for vma_is_dax() */ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, unsigned long addr, struct vm_area_struct *vma); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); #else static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) { } #endif vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags); bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long next); int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr); int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr); bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, pgprot_t pgprot, bool write); /** * vmf_insert_pfn_pmd - insert a pmd size pfn * @vmf: Structure describing the fault * @pfn: pfn to insert * @pgprot: page protection to use * @write: whether it's a write fault * * Insert a pmd size pfn. See vmf_insert_pfn() for additional info. * * Return: vm_fault_t value. */ static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) { return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write); } vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn, pgprot_t pgprot, bool write); /** * vmf_insert_pfn_pud - insert a pud size pfn * @vmf: Structure describing the fault * @pfn: pfn to insert * @pgprot: page protection to use * @write: whether it's a write fault * * Insert a pud size pfn. See vmf_insert_pfn() for additional info. * * Return: vm_fault_t value. */ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) { return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write); } enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_NEVER_DAX, TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif }; struct kobject; struct kobj_attribute; ssize_t single_hugepage_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag flag); ssize_t single_hugepage_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) #define HPAGE_PUD_SHIFT PUD_SHIFT #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) extern unsigned long transparent_hugepage_flags; static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long haddr) { /* Don't have to check pgoff for anonymous vma */ if (!vma_is_anonymous(vma)) { if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR)) return false; } if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) return false; return true; } static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, unsigned long vm_flags) { /* Explicitly disabled through madvise. */ if ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) return false; return true; } /* * to be used on vmas which are known to support THP. * Use transparent_hugepage_active otherwise */ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { /* * If the hardware/firmware marked hugepage support disabled. */ if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX)) return false; if (!transhuge_vma_enabled(vma, vma->vm_flags)) return false; if (vma_is_temporary_stack(vma)) return false; if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; if (vma_is_dax(vma)) return true; if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) return !!(vma->vm_flags & VM_HUGEPAGE); return false; } bool transparent_hugepage_active(struct vm_area_struct *vma); #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void prep_transhuge_page(struct page *page); void free_transhuge_page(struct page *page); bool is_transparent_hugepage(struct page *page); bool can_split_huge_page(struct page *page, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { return split_huge_page_to_list(page, NULL); } void deferred_split_huge_page(struct page *page); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct page *page); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false, NULL); \ } while (0) void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ if (pud_trans_huge(*____pud) \ || pud_devmap(*____pud)) \ __split_huge_pud(__vma, __pud, __address); \ } while (0) int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); static inline int is_swap_pmd(pmd_t pmd) { return !pmd_none(pmd) && !pmd_present(pmd); } /* mmap_lock must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { if (pud_trans_huge(*pud) || pud_devmap(*pud)) return __pud_trans_huge_lock(pud, vma); else return NULL; } /** * thp_head - Head page of a transparent huge page. * @page: Any page (tail, head or regular) found in the page cache. */ static inline struct page *thp_head(struct page *page) { return compound_head(page); } /** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. */ static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); if (PageHead(page)) return HPAGE_PMD_ORDER; return 0; } /** * thp_nr_pages - The number of regular pages in this huge page. * @page: The head page of a huge page. */ static inline int thp_nr_pages(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); if (PageHead(page)) return HPAGE_PMD_NR; return 1; } struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap); vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_page(struct page *page) { return READ_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) { return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); } static inline bool is_huge_zero_pud(pud_t pud) { return false; } struct page *mm_get_huge_zero_page(struct mm_struct *mm); void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } static inline struct list_head *page_deferred_list(struct page *page) { /* * Global or memcg deferred list in the second tail pages is * occupied by compound_head. */ return &page[2].deferred_list; } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) static inline struct page *thp_head(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); return page; } static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); return 0; } static inline int thp_nr_pages(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); return 1; } static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { return false; } static inline bool transparent_hugepage_active(struct vm_area_struct *vma) { return false; } static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long haddr) { return false; } static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, unsigned long vm_flags) { return false; } static inline void prep_transhuge_page(struct page *page) {} static inline bool is_transparent_hugepage(struct page *page) { return false; } #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL static inline bool can_split_huge_page(struct page *page, int *pextra_pins) { BUILD_BUG(); return false; } static inline int split_huge_page_to_list(struct page *page, struct list_head *list) { return 0; } static inline int split_huge_page(struct page *page) { return 0; } static inline void deferred_split_huge_page(struct page *page) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct page *page) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { BUG(); return 0; } static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next) { } static inline int is_swap_pmd(pmd_t pmd) { return 0; } static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { return NULL; } static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd) { return 0; } static inline bool is_huge_zero_page(struct page *page) { return false; } static inline bool is_huge_zero_pmd(pmd_t pmd) { return false; } static inline bool is_huge_zero_pud(pud_t pud) { return false; } static inline void mm_put_huge_zero_page(struct mm_struct *mm) { return; } static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap) { return NULL; } static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap) { return NULL; } static inline bool thp_migration_supported(void) { return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /** * thp_size - Size of a transparent huge page. * @page: Head page of a transparent huge page. * * Return: Number of bytes in this page. */ static inline unsigned long thp_size(struct page *page) { return PAGE_SIZE << thp_order(page); } #endif /* _LINUX_HUGE_MM_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 // SPDX-License-Identifier: GPL-2.0 #ifndef _LINUX_KERNEL_TRACE_H #define _LINUX_KERNEL_TRACE_H #include <linux/fs.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/ring_buffer.h> #include <linux/mmiotrace.h> #include <linux/tracepoint.h> #include <linux/ftrace.h> #include <linux/trace.h> #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> #include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/glob.h> #include <linux/irq_work.h> #include <linux/workqueue.h> #include <linux/ctype.h> #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ #include <asm/syscall.h> /* some archs define it here */ #endif enum trace_type { __TRACE_FIRST_TYPE = 0, TRACE_FN, TRACE_CTX, TRACE_WAKE, TRACE_STACK, TRACE_PRINT, TRACE_BPRINT, TRACE_MMIO_RW, TRACE_MMIO_MAP, TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, TRACE_HWLAT, TRACE_RAW_DATA, __TRACE_LAST_TYPE, }; #undef __field #define __field(type, item) type item; #undef __field_fn #define __field_fn(type, item) type item; #undef __field_struct #define __field_struct(type, item) __field(type, item) #undef __field_desc #define __field_desc(type, container, item) #undef __field_packed #define __field_packed(type, container, item) #undef __array #define __array(type, item, size) type item[size]; #undef __array_desc #define __array_desc(type, container, item, size) #undef __dynamic_array #define __dynamic_array(type, item) type item[]; #undef F_STRUCT #define F_STRUCT(args...) args #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ struct struct_name { \ struct trace_entry ent; \ tstruct \ } #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) #undef FTRACE_ENTRY_REG #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed #include "trace_entries.h" /* Use this for memory failure errors */ #define MEM_FAIL(condition, fmt, ...) ({ \ static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ __warned = true; \ pr_err("ERROR: " fmt, ##__VA_ARGS__); \ } \ unlikely(__ret_warn_once); \ }) /* * syscalls are special, and need special handling, this is why * they are not included in trace_entries.h */ struct syscall_trace_enter { struct trace_entry ent; int nr; unsigned long args[]; }; struct syscall_trace_exit { struct trace_entry ent; int nr; long ret; }; struct kprobe_trace_entry_head { struct trace_entry ent; unsigned long ip; }; struct kretprobe_trace_entry_head { struct trace_entry ent; unsigned long func; unsigned long ret_ip; }; /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_IRQS_NOSUPPORT = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, }; #define TRACE_BUF_SIZE 1024 struct trace_array; /* * The CPU trace array - it consists of thousands of trace entries * plus some other descriptor data: (for example which task started * the trace, etc.) */ struct trace_array_cpu { atomic_t disabled; void *buffer_page; /* ring buffer spare */ unsigned long entries; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; unsigned long critical_sequence; unsigned long nice; unsigned long policy; unsigned long rt_priority; unsigned long skipped_entries; u64 preempt_timestamp; pid_t pid; kuid_t uid; char comm[TASK_COMM_LEN]; #ifdef CONFIG_FUNCTION_TRACER int ftrace_ignore_pid; #endif bool ignore_pid; }; struct tracer; struct trace_option_dentry; struct array_buffer { struct trace_array *tr; struct trace_buffer *buffer; struct trace_array_cpu __percpu *data; u64 time_start; int cpu; }; #define TRACE_FLAGS_MAX_SIZE 32 struct trace_options { struct tracer *tracer; struct trace_option_dentry *topts; }; struct trace_pid_list { int pid_max; unsigned long *pids; }; enum { TRACE_PIDS = BIT(0), TRACE_NO_PIDS = BIT(1), }; static inline bool pid_type_enabled(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* Return true if the pid list in type has pids */ return ((type & TRACE_PIDS) && pid_list) || ((type & TRACE_NO_PIDS) && no_pid_list); } static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_list, struct trace_pid_list *no_pid_list) { /* * Turning off what is in @type, return true if the "other" * pid list, still has pids in it. */ return (!(type & TRACE_PIDS) && pid_list) || (!(type & TRACE_NO_PIDS) && no_pid_list); } typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); /** * struct cond_snapshot - conditional snapshot data and callback * * The cond_snapshot structure encapsulates a callback function and * data associated with the snapshot for a given tracing instance. * * When a snapshot is taken conditionally, by invoking * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is * passed in turn to the cond_snapshot.update() function. That data * can be compared by the update() implementation with the cond_data * contained within the struct cond_snapshot instance associated with * the trace_array. Because the tr->max_lock is held throughout the * update() call, the update() function can directly retrieve the * cond_snapshot and cond_data associated with the per-instance * snapshot associated with the trace_array. * * The cond_snapshot.update() implementation can save data to be * associated with the snapshot if it decides to, and returns 'true' * in that case, or it returns 'false' if the conditional snapshot * shouldn't be taken. * * The cond_snapshot instance is created and associated with the * user-defined cond_data by tracing_cond_snapshot_enable(). * Likewise, the cond_snapshot instance is destroyed and is no longer * associated with the trace instance by * tracing_cond_snapshot_disable(). * * The method below is required. * * @update: When a conditional snapshot is invoked, the update() * callback function is invoked with the tr->max_lock held. The * update() implementation signals whether or not to actually * take the snapshot, by returning 'true' if so, 'false' if no * snapshot should be taken. Because the max_lock is held for * the duration of update(), the implementation is safe to * directly retrieved and save any implementation data it needs * to in association with the snapshot. */ struct cond_snapshot { void *cond_data; cond_update_fn_t update; }; /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. * They have on/off state as well: */ struct trace_array { struct list_head list; char *name; struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* * The max_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. * Some tracers will use this to store a maximum trace while * it continues examining live traces. * * The buffers for the max_buffer are set up the same as the array_buffer * When a snapshot is taken, the buffer of the max_buffer is swapped * with the buffer of the array_buffer and the buffers are reset for * the array_buffer so the tracing can continue. */ struct array_buffer max_buffer; bool allocated_snapshot; #endif #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; #endif #endif struct trace_pid_list __rcu *filtered_pids; struct trace_pid_list __rcu *filtered_no_pids; /* * max_lock is used to protect the swapping of buffers * when taking a max snapshot. The buffers themselves are * protected by per_cpu spinlocks. But the action of the swap * needs its own lock. * * This is defined as a arch_spinlock_t in order to help * with performance when lockdep debugging is enabled. * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the * CONFIG_TRACER_MAX_TRACE. */ arch_spinlock_t max_lock; int buffer_disabled; #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; int nr_topts; bool clear_trace; int buffer_percent; unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; struct dentry *event_dir; struct trace_options *topts; struct list_head systems; struct list_head events; struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; int trace_ref; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; struct list_head mod_trace; struct list_head mod_notrace; #endif /* function tracing enabled */ int function_enabled; #endif int time_stamp_abs_ref; struct list_head hist_vars; #ifdef CONFIG_TRACER_SNAPSHOT struct cond_snapshot *cond_snapshot; #endif }; enum { TRACE_ARRAY_FL_GLOBAL = (1 << 0) }; extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. */ static inline struct trace_array *top_trace_array(void) { struct trace_array *tr; if (list_empty(&ftrace_trace_arrays)) return NULL; tr = list_entry(ftrace_trace_arrays.prev, typeof(*tr), list); WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL)); return tr; } #define FTRACE_CMP_TYPE(var, type) \ __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN #define IF_ASSIGN(var, entry, etype, id) \ if (FTRACE_CMP_TYPE(var, etype)) { \ var = (typeof(var))(entry); \ WARN_ON(id != 0 && (entry)->type != id); \ break; \ } /* Will cause compile errors if type is not found. */ extern void __ftrace_bad_type(void); /* * The trace_assign_type is a verifier that the entry type is * the same as the type being assigned. To add new types simply * add a line with the following format: * * IF_ASSIGN(var, ent, type, id); * * Where "type" is the trace type that includes the trace_entry * as the "ent" item. And "id" is the trace identifier that is * used in the trace_type enum. * * If the type can have more than one id, then use zero. */ #define trace_assign_type(var, ent) \ do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \ IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ __ftrace_bad_type(); \ } while (0) /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the * flags value in struct tracer_flags. */ struct tracer_opt { const char *name; /* Will appear on the trace_options file */ u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* * The set of specific options for a tracer. Your tracer * have to set the initial value of the flags val. */ struct tracer_flags { u32 val; struct tracer_opt *opts; struct tracer *trace; }; /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b struct trace_option_dentry { struct tracer_opt *opt; struct tracer_flags *flags; struct trace_array *tr; struct dentry *entry; }; /** * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer * @start: called when tracing is unpaused (echo 1 > tracing_on) * @stop: called when tracing is paused (echo 0 > tracing_on) * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened * @close: called when the trace file is released * @pipe_close: called when the trace_pipe file is released * @read: override the default read callback on trace_pipe * @splice_read: override the default splice_read callback on trace_pipe * @selftest: selftest to run on boot (see trace_selftest.c) * @print_headers: override the first lines that describe your columns * @print_line: callback that prints a trace * @set_flag: signals one of your private flags changed (trace_options file) * @flags: your private flags */ struct tracer { const char *name; int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); int (*update_thresh)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*pipe_close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t (*splice_read)(struct trace_iterator *iter, struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(struct trace_array *tr, u32 old_flags, u32 bit, int set); /* Return 0 if OK with change, else return non-zero */ int (*flag_changed)(struct trace_array *tr, u32 mask, int set); struct tracer *next; struct tracer_flags *flags; int enabled; bool print_max; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif /* True if tracer cannot be enabled in kernel param */ bool noboot; }; /* Only current can touch trace_recursion */ /* * For function tracing recursion: * The order of these bits are important. * * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... * If callback is registered to the "global" list, the list * function is called and recursion checks the GLOBAL bits. * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. */ enum { /* Function recursion bits */ TRACE_FTRACE_BIT, TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, TRACE_FTRACE_TRANSITION_BIT, /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function * graph in irq because we want to trace a particular function that * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ TRACE_IRQ_BIT, /* Set if the function is in the set_graph_function file */ TRACE_GRAPH_BIT, /* * In the very unlikely case that an interrupt came in * at a start of graph tracing, and we want to trace * the function in that interrupt, the depth can be greater * than zero, because of the preempted start of a previous * trace. In an even more unlikely case, depth could be 2 * if a softirq interrupted the start of graph tracing, * followed by an interrupt preempting a start of graph * tracing in the softirq, and depth can even be 3 * if an NMI came in at the start of an interrupt function * that preempted a softirq start of a function that * preempted normal context!!!! Luckily, it can't be * greater than 3, so the next two bits are a mask * of what the depth is when we set TRACE_GRAPH_BIT */ TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, /* * To implement set_graph_notrace, if this bit is set, we ignore * function graph tracing of called functions, until the return * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) #define trace_recursion_depth() \ (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) #define trace_recursion_set_depth(depth) \ do { \ current->trace_recursion &= \ ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ current->trace_recursion |= \ ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ } while (0) #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT #define TRACE_LIST_START TRACE_INTERNAL_BIT #define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) enum { TRACE_CTX_NMI, TRACE_CTX_IRQ, TRACE_CTX_SOFTIRQ, TRACE_CTX_NORMAL, TRACE_CTX_TRANSITION, }; static __always_inline int trace_get_context_bit(void) { int bit; if (in_interrupt()) { if (in_nmi()) bit = TRACE_CTX_NMI; else if (in_irq()) bit = TRACE_CTX_IRQ; else bit = TRACE_CTX_SOFTIRQ; } else bit = TRACE_CTX_NORMAL; return bit; } static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); return bit; } val |= 1 << bit; current->trace_recursion = val; barrier(); return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; bit = 1 << bit; val &= ~bit; barrier(); current->trace_recursion = val; } static inline struct ring_buffer_iter * trace_buffer_iter(struct trace_iterator *iter, int cpu) { return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); int tracing_init_dentry(void); struct ring_buffer_event; struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); void tracing_start_tgid_record(void); void tracing_stop_tgid_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); loff_t tracing_lseek(struct file *file, loff_t offset, int whence); extern cpumask_var_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; /* PID filtering */ extern int pid_max; bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct trace_pid_list *filtered_no_pids, struct task_struct *task); void trace_filter_add_remove_task(struct trace_pid_list *pid_list, struct task_struct *self, struct task_struct *task); void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos); void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos); int trace_pid_show(struct seq_file *m, void *v); void trace_free_pid_list(struct trace_pid_list *pid_list); int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); #ifdef CONFIG_TRACER_MAX_TRACE void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); #endif /* CONFIG_TRACER_MAX_TRACE */ #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ defined(CONFIG_FSNOTIFY) void latency_fsnotify(struct trace_array *tr); #else static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc); #else static inline void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { } #endif /* CONFIG_STACKTRACE */ extern u64 ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { } #endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; #ifdef CONFIG_FTRACE_STARTUP_TEST extern void __init disable_tracing_selftest(const char *reason); extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_function_graph(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); /* * Tracer data references selftest functions that only occur * on boot up. These can be __init functions. Thus, when selftests * are enabled, then the tracers need to reference __init functions. */ #define __tracer_data __refdata #else static inline void __init disable_tracing_selftest(const char *reason) { } /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); extern unsigned long long ns2usecs(u64 nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args); int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...); void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); extern char trace_find_mark(unsigned long long duration); struct ftrace_hash; struct ftrace_mod_load { struct list_head list; char *func; char *module; int enable; }; enum { FTRACE_HASH_FL_MOD = (1 << 0), }; struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets; unsigned long count; unsigned long flags; struct rcu_head rcu; }; struct ftrace_func_entry * ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip); static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) { return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD)); } /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Flag options */ #define TRACE_GRAPH_PRINT_OVERRUN 0x1 #define TRACE_GRAPH_PRINT_CPU 0x2 #define TRACE_GRAPH_PRINT_OVERHEAD 0x4 #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_REL_TIME 0x40 #define TRACE_GRAPH_PRINT_IRQS 0x80 #define TRACE_GRAPH_PRINT_TAIL 0x100 #define TRACE_GRAPH_SLEEP_TIME 0x200 #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) extern void ftrace_graph_sleep_time_control(bool enable); #ifdef CONFIG_FUNCTION_PROFILER extern void ftrace_graph_graph_time_control(bool enable); #else static inline void ftrace_graph_graph_time_control(bool enable) { } #endif extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); extern void trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned long flags, int pc); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; struct ftrace_hash *hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); if (ftrace_hash_empty(hash)) { ret = 1; goto out; } if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions * when the depth is zero. */ trace_recursion_set(TRACE_GRAPH_BIT); trace_recursion_set_depth(trace->depth); /* * If no irqs are to be traced, but a set_graph_function * is set, and called by an interrupt handler, we still * want to trace it. */ if (in_irq()) trace_recursion_set(TRACE_IRQ_BIT); else trace_recursion_clear(TRACE_IRQ_BIT); ret = 1; } out: preempt_enable_notrace(); return ret; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { if (trace_recursion_test(TRACE_GRAPH_BIT) && trace->depth == trace_recursion_depth()) trace_recursion_clear(TRACE_GRAPH_BIT); } static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; struct ftrace_hash *notrace_hash; preempt_disable_notrace(); /* * Have to open code "rcu_dereference_sched()" because the * function graph tracer can be called when RCU is not * "watching". * Protected with schedule_on_each_cpu(ftrace_sync) */ notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, !preemptible()); if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); return ret; } #else static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { return 1; } static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) { /* trace it when it is-nested-in or is a function enabled. */ return !(trace_recursion_test(TRACE_GRAPH_BIT) || ftrace_graph_addr(trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER #define FTRACE_PID_IGNORE -1 #define FTRACE_PID_TRACE -2 struct ftrace_func_command { struct list_head list; char *name; int (*func)(struct trace_array *tr, struct ftrace_hash *hash, char *func, char *cmd, char *params, int enable); }; extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { return this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid) != FTRACE_PID_IGNORE; } extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); void ftrace_destroy_function_files(struct trace_array *tr); int ftrace_allocate_ftrace_ops(struct trace_array *tr); void ftrace_free_ftrace_ops(struct trace_array *tr); void ftrace_init_global_array_ops(struct trace_array *tr); void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); void ftrace_reset_array_ops(struct trace_array *tr); void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer); void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d_tracer); void ftrace_clear_pids(struct trace_array *tr); int init_function_trace(void); void ftrace_pid_follow_fork(struct trace_array *tr, bool enable); #else static inline int ftrace_trace_task(struct trace_array *tr) { return 1; } static inline int ftrace_is_dead(void) { return 0; } static inline int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { return 0; } static inline int ftrace_allocate_ftrace_ops(struct trace_array *tr) { return 0; } static inline void ftrace_free_ftrace_ops(struct trace_array *tr) { } static inline void ftrace_destroy_function_files(struct trace_array *tr) { } static inline __init void ftrace_init_global_array_ops(struct trace_array *tr) { } static inline void ftrace_reset_array_ops(struct trace_array *tr) { } static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_clear_pids(struct trace_array *tr) { } static inline int init_function_trace(void) { return 0; } static inline void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) { } /* ftace_func_t type is not defined, use macro instead of static inline */ #define ftrace_init_array_ops(tr, func) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER */ #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); int (*init)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data); void (*free)(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data); int (*print)(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data); }; struct ftrace_func_mapper; typedef int (*ftrace_mapper_func)(void *data); struct ftrace_func_mapper *allocate_ftrace_func_mapper(void); void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, unsigned long ip); int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data); void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, unsigned long ip); void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, ftrace_mapper_func free_func); extern int register_ftrace_function_probe(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data); extern int unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, struct ftrace_probe_ops *ops); extern void clear_ftrace_function_probes(struct trace_array *tr); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); #else struct ftrace_func_command; static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; } static inline __init int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } static inline void clear_ftrace_function_probes(struct trace_array *tr) { } /* * The ops parameter passed in is usually undefined. * This must be a macro. */ #define ftrace_create_filter_files(ops, parent) do { } while (0) #define ftrace_destroy_filter_files(ops) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ bool ftrace_event_is_function(struct trace_event_call *call); /* * struct trace_parser - servers for reading the user input separated by spaces * @cont: set if the input is not complete - no final space char was found * @buffer: holds the parsed user input * @idx: user input length * @size: buffer size */ struct trace_parser { bool cont; char *buffer; unsigned idx; unsigned size; }; static inline bool trace_parser_loaded(struct trace_parser *parser) { return (parser->idx != 0); } static inline bool trace_parser_cont(struct trace_parser *parser) { return parser->cont; } static inline void trace_parser_clear(struct trace_parser *parser) { parser->cont = false; parser->idx = 0; } extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, size_t cnt, loff_t *ppos); /* * Only create function graph options if function graph is configured. */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER # define FGRAPH_FLAGS \ C(DISPLAY_GRAPH, "display-graph"), #else # define FGRAPH_FLAGS #endif #ifdef CONFIG_BRANCH_TRACER # define BRANCH_FLAGS \ C(BRANCH, "branch"), #else # define BRANCH_FLAGS #endif #ifdef CONFIG_FUNCTION_TRACER # define FUNCTION_FLAGS \ C(FUNCTION, "function-trace"), \ C(FUNC_FORK, "function-fork"), # define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION #else # define FUNCTION_FLAGS # define FUNCTION_DEFAULT_FLAGS 0UL # define TRACE_ITER_FUNC_FORK 0UL #endif #ifdef CONFIG_STACKTRACE # define STACK_FLAGS \ C(STACKTRACE, "stacktrace"), #else # define STACK_FLAGS #endif /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * * NOTE: These bits must match the trace_options array in * trace.c (this macro guarantees it). */ #define TRACE_FLAGS \ C(PRINT_PARENT, "print-parent"), \ C(SYM_OFFSET, "sym-offset"), \ C(SYM_ADDR, "sym-addr"), \ C(VERBOSE, "verbose"), \ C(RAW, "raw"), \ C(HEX, "hex"), \ C(BIN, "bin"), \ C(BLOCK, "block"), \ C(PRINTK, "trace_printk"), \ C(ANNOTATE, "annotate"), \ C(USERSTACKTRACE, "userstacktrace"), \ C(SYM_USEROBJ, "sym-userobj"), \ C(PRINTK_MSGONLY, "printk-msg-only"), \ C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ C(LATENCY_FMT, "latency-format"), \ C(RECORD_CMD, "record-cmd"), \ C(RECORD_TGID, "record-tgid"), \ C(OVERWRITE, "overwrite"), \ C(STOP_ON_FREE, "disable_on_free"), \ C(IRQ_INFO, "irq-info"), \ C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ BRANCH_FLAGS /* * By defining C, we can make TRACE_FLAGS a list of bit names * that will define the bits for the flag masks. */ #undef C #define C(a, b) TRACE_ITER_##a##_BIT enum trace_iterator_bits { TRACE_FLAGS /* Make sure we don't go more than we have bits for */ TRACE_ITER_LAST_BIT }; /* * By redefining C, we can make TRACE_FLAGS a list of masks that * use the bits as defined above. */ #undef C #define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT) enum trace_iterator_flags { TRACE_FLAGS }; /* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. */ #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) extern struct tracer nop_trace; #ifdef CONFIG_BRANCH_TRACER extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); static inline int trace_branch_enable(struct trace_array *tr) { if (tr->trace_flags & TRACE_ITER_BRANCH) return enable_branch_tracing(tr); return 0; } static inline void trace_branch_disable(void) { /* due to races, always disable */ disable_branch_tracing(); } #else static inline int trace_branch_enable(struct trace_array *tr) { return 0; } static inline void trace_branch_disable(void) { } #endif /* CONFIG_BRANCH_TRACER */ /* set ring buffers to default size if not already done so */ int tracing_update_buffers(void); struct ftrace_event_field { struct list_head link; const char *name; const char *type; int filter_type; int offset; int size; int is_signed; }; struct prog_entry; struct event_filter { struct prog_entry __rcu *prog; char *filter_string; }; struct event_subsystem { struct list_head list; const char *name; struct event_filter *filter; int ref_count; }; struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; struct dentry *entry; int ref_count; int nr_events; }; extern int call_filter_check_discard(struct trace_event_call *call, void *rec, struct trace_buffer *buffer, struct ring_buffer_event *event); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); DECLARE_PER_CPU(int, trace_buffered_event_cnt); void trace_buffered_event_disable(void); void trace_buffered_event_enable(void); static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { /* Simply release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); return; } ring_buffer_discard_commit(buffer, event); } /* * Helper function for event_trigger_unlock_commit{_regs}(). * If there are event triggers attached to this event that requires * filtering against its fields, then they will be called as the * entry already holds the field information of the current event. * * It also checks if the event should be discarded or not. * It is to be discarded if the event is soft disabled and the * event was only recorded to process triggers, or if the event * filter is active and this event did not match the filters. * * Returns true if the event is discarded, false otherwise. */ static inline bool __event_trigger_test_discard(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, enum event_trigger_type *tt) { unsigned long eflags = file->flags; if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry, event); if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED | EVENT_FILE_FL_PID_FILTER)))) return false; if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) goto discard; if (file->flags & EVENT_FILE_FL_FILTERED && !filter_match_preds(file->filter, entry)) goto discard; if ((file->flags & EVENT_FILE_FL_PID_FILTER) && trace_event_ignore_this_pid(file)) goto discard; return false; discard: __trace_event_discard_commit(buffer, event); return true; } /** * event_trigger_unlock_commit - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. */ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) event_triggers_post_call(file, tt); } /** * event_trigger_unlock_commit_regs - handle triggers and finish event commit * @file: The file pointer assoctiated to the event * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself * @irq_flags: The state of the interrupts at the start of the event * @pc: The state of the preempt count at the start of the event. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and * if the event is soft disabled and should be discarded. * * Same as event_trigger_unlock_commit() but calls * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). */ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, irq_flags, pc, regs); if (tt) event_triggers_post_call(file, tt); } #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) /* * The max preds is the size of unsigned short with * two flags at the MSBs. One bit is used for both the IS_RIGHT * and FOLD flags. The other is reserved. * * 2^14 preds is way more than enough. */ #define MAX_FILTER_PRED 16384 struct filter_pred; struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); typedef int (*regex_match_func)(char *str, struct regex *r, int len); enum regex_type { MATCH_FULL = 0, MATCH_FRONT_ONLY, MATCH_MIDDLE_ONLY, MATCH_END_ONLY, MATCH_GLOB, MATCH_INDEX, }; struct regex { char pattern[MAX_FILTER_STR_VAL]; int len; int field_len; regex_match_func match; }; struct filter_pred { filter_pred_fn_t fn; u64 val; struct regex regex; unsigned short *ops; struct ftrace_event_field *field; int offset; int not; int op; }; static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; } static inline bool is_function_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_TRACE_FN; } extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct trace_event_file *file, struct trace_seq *s); extern int apply_event_filter(struct trace_event_file *file, char *filter_string); extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); extern int create_event_filter(struct trace_array *tr, struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); extern struct trace_event_file *__find_event_file(struct trace_array *tr, const char *system, const char *event); extern struct trace_event_file *find_event_file(struct trace_array *tr, const char *system, const char *event); static inline void *event_file_data(struct file *filp) { return READ_ONCE(file_inode(filp)->i_private); } extern struct mutex event_mutex; extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); extern int register_trigger_hist_enable_disable_cmds(void); #else static inline int register_trigger_hist_cmd(void) { return 0; } static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; } #endif extern int register_trigger_cmds(void); extern void clear_event_triggers(struct trace_array *tr); struct event_trigger_data { unsigned long count; int ref; struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; void *private_data; bool paused; bool paused_tmp; struct list_head list; char *name; struct list_head named_list; struct event_trigger_data *named_data; }; /* Avoid typos */ #define ENABLE_EVENT_STR "enable_event" #define DISABLE_EVENT_STR "disable_event" #define ENABLE_HIST_STR "enable_hist" #define DISABLE_HIST_STR "disable_hist" struct enable_trigger_data { struct trace_event_file *file; bool enable; bool hist; }; extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int event_enable_trigger_func(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_ops *ops, struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable); extern void update_cond_flag(struct trace_event_file *file); extern int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, struct trace_event_file *file); extern struct event_trigger_data *find_named_trigger(const char *name); extern bool is_named_trigger(struct event_trigger_data *test); extern int save_named_trigger(const char *name, struct event_trigger_data *data); extern void del_named_trigger(struct event_trigger_data *data); extern void pause_named_trigger(struct event_trigger_data *data); extern void unpause_named_trigger(struct event_trigger_data *data); extern void set_named_trigger_data(struct event_trigger_data *data, struct event_trigger_data *named_data); extern struct event_trigger_data * get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); /** * struct event_trigger_ops - callbacks for trace event triggers * * The methods in this structure provide per-event trigger hooks for * various trigger operations. * * All the methods below, except for @init() and @free(), must be * implemented. * * @func: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with * the trace record, rec. * * @init: An optional initialization function called for the trigger * when the trigger is registered (via the event_command reg() * function). This can be used to perform per-trigger * initialization such as incrementing a per-trigger reference * count, for instance. This is usually implemented by the * generic utility function @event_trigger_init() (see * trace_event_triggers.c). * * @free: An optional de-initialization function called for the * trigger when the trigger is unregistered (via the * event_command @reg() function). This can be used to perform * per-trigger de-initialization such as decrementing a * per-trigger reference count and freeing corresponding trigger * data, for instance. This is usually implemented by the * generic utility function @event_trigger_free() (see * trace_event_triggers.c). * * @print: The callback function invoked to have the trigger print * itself. This is usually implemented by a wrapper function * that calls the generic utility function @event_trigger_print() * (see trace_event_triggers.c). */ struct event_trigger_ops { void (*func)(struct event_trigger_data *data, void *rec, struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, struct event_trigger_data *data); int (*print)(struct seq_file *m, struct event_trigger_ops *ops, struct event_trigger_data *data); }; /** * struct event_command - callbacks and data members for event commands * * Event commands are invoked by users by writing the command name * into the 'trigger' file associated with a trace event. The * parameters associated with a specific invocation of an event * command are used to create an event trigger instance, which is * added to the list of trigger instances associated with that trace * event. When the event is hit, the set of triggers associated with * that event is invoked. * * The data members in this structure provide per-event command data * for various event commands. * * All the data members below, except for @post_trigger, must be set * for each event command. * * @name: The unique name that identifies the event command. This is * the name used when setting triggers via trigger files. * * @trigger_type: A unique id that identifies the event command * 'type'. This value has two purposes, the first to ensure that * only one trigger of the same type can be set at a given time * for a particular event e.g. it doesn't make sense to have both * a traceon and traceoff trigger attached to a single event at * the same time, so traceon and traceoff have the same type * though they have different names. The @trigger_type value is * also used as a bit value for deferring the actual trigger * action until after the current event is finished. Some * commands need to do this if they themselves log to the trace * buffer (see the @post_trigger() member below). @trigger_type * values are defined by adding new values to the trigger_type * enum in include/linux/trace_events.h. * * @flags: See the enum event_command_flags below. * * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * * @func: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other * event_command callback functions to orchestrate this, and is * usually implemented by the generic utility function * @event_trigger_callback() (see trace_event_triggers.c). * * @reg: Adds the trigger to the list of triggers associated with the * event, and enables the event trigger itself, after * initializing it (via the event_trigger_ops @init() function). * This is also where commands can use the @trigger_type value to * make the decision as to whether or not multiple instances of * the trigger should be allowed. This is usually implemented by * the generic utility function @register_trigger() (see * trace_event_triggers.c). * * @unreg: Removes the trigger from the list of triggers associated * with the event, and disables the event trigger itself, after * initializing it (via the event_trigger_ops @free() function). * This is usually implemented by the generic utility function * @unregister_trigger() (see trace_event_triggers.c). * * @unreg_all: An optional function called to remove all the triggers * from the list of triggers associated with the event. Called * when a trigger file is opened in truncate mode. * * @set_filter: An optional function called to parse and set a filter * for the trigger. If no @set_filter() method is set for the * event command, filters set by the user for the command will be * ignored. This is usually implemented by the generic utility * function @set_trigger_filter() (see trace_event_triggers.c). * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; int (*func)(struct event_command *cmd_ops, struct trace_event_file *file, char *glob, char *cmd, char *params); int (*reg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; /** * enum event_command_flags - flags for struct event_command * * @POST_TRIGGER: A flag that says whether or not this command needs * to have its action delayed until after the current event has * been closed. Some triggers need to avoid being invoked while * an event is currently in the process of being logged, since * the trigger may itself log data into the trace buffer. Thus * we make sure the current event is committed before invoking * those triggers. To do that, the trigger invocation is split * in two - the first part checks the filter using the current * trace record; if a command has the @post_trigger flag set, it * sets a bit for itself in the return value, otherwise it * directly invokes the trigger. Once all commands have been * either invoked or set their return flag, the current record is * either committed or discarded. At that point, if any commands * have deferred their triggers, those commands are finally * invoked following the close of the current event. In other * words, if the event_trigger_ops @func() probe implementation * itself logs to the trace buffer, this flag should be set, * otherwise it can be left unspecified. * * @NEEDS_REC: A flag that says whether or not this command needs * access to the trace record in order to perform its function, * regardless of whether or not it has a filter associated with * it (filters make a trigger require access to the trace record * but are not always present). */ enum event_command_flags { EVENT_CMD_FL_POST_TRIGGER = 1, EVENT_CMD_FL_NEEDS_REC = 2, }; static inline bool event_command_post_trigger(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_POST_TRIGGER; } static inline bool event_command_needs_rec(struct event_command *cmd_ops) { return cmd_ops->flags & EVENT_CMD_FL_NEEDS_REC; } extern int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable); extern int tracing_alloc_snapshot(void); extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data); extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update); extern int tracing_snapshot_cond_disable(struct trace_array *tr); extern void *tracing_cond_snapshot_data(struct trace_array *tr); extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; extern const char *__start___tracepoint_str[]; extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); /* Used from boot time tracer */ extern int trace_set_options(struct trace_array *tr, char *option); extern int tracing_set_tracer(struct trace_array *tr, const char *buf); extern ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id); extern int tracing_set_cpumask(struct trace_array *tr, cpumask_var_t tracing_cpumask_new); #define MAX_EVENT_NAME_LEN 64 extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, const char *loc, const char *cmd, const char **errs, u8 type, u8 pos); /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats * into sections to display. But the trace infrastructure wants * to use these without the added overhead at the price of being * a bit slower (used mainly for warnings, where we don't care * about performance). The internal_trace_puts() is for such * a purpose. */ #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #undef FTRACE_ENTRY_PACKED #define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER) int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL #endif #ifdef CONFIG_FTRACE_SYSCALLS void init_ftrace_syscalls(void); const char *get_syscall_name(int syscall); #else static inline void init_ftrace_syscalls(void) { } static inline const char *get_syscall_name(int syscall) { return NULL; } #endif #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); void trace_event_eval_update(struct trace_eval_map **map, int len); /* Used from boot time tracer */ extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); extern int trigger_process_regex(struct trace_event_file *file, char *buff); #else static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif #ifdef CONFIG_TRACER_SNAPSHOT void tracing_snapshot_instance(struct trace_array *tr); int tracing_alloc_snapshot_instance(struct trace_array *tr); #else static inline void tracing_snapshot_instance(struct trace_array *tr) { } static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) { return 0; } #endif #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1); void tracer_preempt_off(unsigned long a0, unsigned long a1); #else static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { } static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_IRQSOFF_TRACER void tracer_hardirqs_on(unsigned long a0, unsigned long a1); void tracer_hardirqs_off(unsigned long a0, unsigned long a1); #else static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { } static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif extern struct trace_iterator *tracepoint_print_iter; /* * Reset the state of the trace_iterator so that it can read consumed data. * Normally, the trace_iterator is used for reading the data when it is not * consumed, and must retain state. */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { const size_t offset = offsetof(struct trace_iterator, seq); /* * Keep gcc from complaining about overwriting more than just one * member in the structure. */ memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); iter->pos = -1; } /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) { if (!isalpha(*name) && *name != '_') return false; while (*++name != '\0') { if (!isalpha(*name) && !isdigit(*name) && *name != '_') return false; } return true; } #endif /* _LINUX_KERNEL_TRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM signal #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SIGNAL_H #include <linux/signal.h> #include <linux/sched.h> #include <linux/tracepoint.h> #define TP_STORE_SIGINFO(__entry, info) \ do { \ if (info == SEND_SIG_NOINFO) { \ __entry->errno = 0; \ __entry->code = SI_USER; \ } else if (info == SEND_SIG_PRIV) { \ __entry->errno = 0; \ __entry->code = SI_KERNEL; \ } else { \ __entry->errno = info->si_errno; \ __entry->code = info->si_code; \ } \ } while (0) #ifndef TRACE_HEADER_MULTI_READ enum { TRACE_SIGNAL_DELIVERED, TRACE_SIGNAL_IGNORED, TRACE_SIGNAL_ALREADY_PENDING, TRACE_SIGNAL_OVERFLOW_FAIL, TRACE_SIGNAL_LOSE_INFO, }; #endif /** * signal_generate - called when a signal is generated * @sig: signal number * @info: pointer to struct siginfo * @task: pointer to struct task_struct * @group: shared or private * @result: TRACE_SIGNAL_* * * Current process sends a 'sig' signal to 'task' process with * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, * 'info' is not a pointer and you can't access its field. Instead, * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV * means that si_code is SI_KERNEL. */ TRACE_EVENT(signal_generate, TP_PROTO(int sig, struct kernel_siginfo *info, struct task_struct *task, int group, int result), TP_ARGS(sig, info, task, group, result), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, group ) __field( int, result ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->pid = task->pid; __entry->group = group; __entry->result = result; ), TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", __entry->sig, __entry->errno, __entry->code, __entry->comm, __entry->pid, __entry->group, __entry->result) ); /** * signal_deliver - called when a signal is delivered * @sig: signal number * @info: pointer to struct siginfo * @ka: pointer to struct k_sigaction * * A 'sig' signal is delivered to current process with 'info' siginfo, * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or * SIG_DFL. * Note that some signals reported by signal_generate tracepoint can be * lost, ignored or modified (by debugger) before hitting this tracepoint. * This means, this can show which signals are actually delivered, but * matching generated signals and delivered signals may not be correct. */ TRACE_EVENT(signal_deliver, TP_PROTO(int sig, struct kernel_siginfo *info, struct k_sigaction *ka), TP_ARGS(sig, info, ka), TP_STRUCT__entry( __field( int, sig ) __field( int, errno ) __field( int, code ) __field( unsigned long, sa_handler ) __field( unsigned long, sa_flags ) ), TP_fast_assign( __entry->sig = sig; TP_STORE_SIGINFO(__entry, info); __entry->sa_handler = (unsigned long)ka->sa.sa_handler; __entry->sa_flags = ka->sa.sa_flags; ), TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DAX_H #define _LINUX_DAX_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/radix-tree.h> /* Flag for synchronous flush */ #define DAXDEV_F_SYNC (1UL << 0) typedef unsigned long dax_entry_t; struct iomap_ops; struct iomap; struct dax_device; struct dax_operations { /* * direct_access: translate a device-relative * logical-page-offset into an absolute physical pfn. Return the * number of pages available for DAX at that pfn. */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); /* * Validate whether this device is usable as an fsdax backing * device. */ bool (*dax_supported)(struct dax_device *, struct block_device *, int, sector_t, sector_t); /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); /* copy_to_iter: required operation for fs-dax direct-i/o */ size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); }; extern struct attribute_group dax_attribute_group; #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); struct dax_device *alloc_dax(void *private, const char *host, const struct dax_operations *ops, unsigned long flags); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); bool __dax_synchronous(struct dax_device *dax_dev); static inline bool dax_synchronous(struct dax_device *dax_dev) { return __dax_synchronous(dax_dev); } void __set_dax_synchronous(struct dax_device *dax_dev); static inline void set_dax_synchronous(struct dax_device *dax_dev) { __set_dax_synchronous(dax_dev); } bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t len); /* * Check if given mapping is supported by the file / underlying device. */ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct dax_device *dax_dev) { if (!(vma->vm_flags & VM_SYNC)) return true; if (!IS_DAX(file_inode(vma->vm_file))) return false; return dax_synchronous(dax_dev); } #else static inline struct dax_device *dax_get_by_host(const char *host) { return NULL; } static inline struct dax_device *alloc_dax(void *private, const char *host, const struct dax_operations *ops, unsigned long flags) { /* * Callers should check IS_ENABLED(CONFIG_DAX) to know if this * NULL is an error or expected. */ return NULL; } static inline void put_dax(struct dax_device *dax_dev) { } static inline void kill_dax(struct dax_device *dax_dev) { } static inline void dax_write_cache(struct dax_device *dax_dev, bool wc) { } static inline bool dax_write_cache_enabled(struct dax_device *dax_dev) { return false; } static inline bool dax_synchronous(struct dax_device *dax_dev) { return true; } static inline void set_dax_synchronous(struct dax_device *dax_dev) { } static inline bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t len) { return false; } static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct dax_device *dax_dev) { return !(vma->vm_flags & VM_SYNC); } #endif struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) bool __bdev_dax_supported(struct block_device *bdev, int blocksize); static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) { return __bdev_dax_supported(bdev, blocksize); } bool __generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors); static inline bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { return __generic_fsdax_supported(dax_dev, bdev, blocksize, start, sectors); } static inline void fs_put_dax(struct dax_device *dax_dev) { put_dax(dax_dev); } struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) { return false; } static inline bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { return false; } static inline void fs_put_dax(struct dax_device *dax_dev) { } static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { return NULL; } static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; } static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages) { return NULL; } static inline int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc) { return -EOPNOTSUPP; } static inline dax_entry_t dax_lock_page(struct page *page) { if (IS_DAX(page->mapping->host)) return ~0UL; return 0; } static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) { } #endif #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); #else static inline int dax_read_lock(void) { return 0; } static inline void dax_read_unlock(int id) { } #endif /* CONFIG_DAX */ bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES void hmem_register_device(int target_nid, struct resource *r); #else static inline void hmem_register_device(int target_nid, struct resource *r) { } #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_RTNETLINK_H #define __LINUX_RTNETLINK_H #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/wait.h> #include <linux/refcount.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, int new_ifindex); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else static inline bool lockdep_rtnl_is_held(void) { return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ /** * rcu_dereference_rtnl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() */ #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) /** * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking * @p: The pointer to read, prior to dereference * * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh() * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh() */ #define rcu_dereference_bh_rtnl(p) \ rcu_dereference_bh_check(p, lockdep_rtnl_is_held()) /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); } static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) { return rcu_dereference(dev->ingress_queue); } struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); #endif void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); #define ASSERT_RTNL() \ WARN_ONCE(!rtnl_is_locked(), \ "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags); extern int ndo_dflt_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, u32 flags, u32 mask, int nlflags, u32 filter_mask, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); #endif /* __LINUX_RTNETLINK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_LWTUNNEL_H #define __NET_LWTUNNEL_H 1 #include <linux/lwtunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/route.h> #define LWTUNNEL_HASH_BITS 7 #define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) /* lw tunnel state flags */ #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) #define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) enum { LWTUNNEL_XMIT_DONE, LWTUNNEL_XMIT_CONTINUE, }; struct lwtunnel_state { __u16 type; __u16 flags; __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); struct rcu_head rcu; __u8 data[]; }; struct lwtunnel_encap_ops { int (*build_state)(struct net *net, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack); void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); struct module *owner; }; #ifdef CONFIG_LWTUNNEL void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { if (lws) atomic_inc(&lws->refcnt); return lws; } static inline void lwtstate_put(struct lwtunnel_state *lws) { if (!lws) return; if (atomic_dec_and_test(&lws->refcnt)) lwtstate_free(lws); } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) return true; return false; } static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT)) return true; return false; } static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT)) return true; return false; } static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { if ((lwtunnel_xmit_redirect(lwtstate) || lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu) return lwtstate->headroom; return 0; } int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, struct netlink_ext_ack *extack); int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress); static inline void lwtunnel_set_redirect(struct dst_entry *dst) { if (lwtunnel_output_redirect(dst->lwtstate)) { dst->lwtstate->orig_output = dst->output; dst->output = lwtunnel_output; } if (lwtunnel_input_redirect(dst->lwtstate)) { dst->lwtstate->orig_input = dst->input; dst->input = lwtunnel_input; } } #else static inline void lwtstate_free(struct lwtunnel_state *lws) { } static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { return lws; } static inline void lwtstate_put(struct lwtunnel_state *lws) { } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline void lwtunnel_set_redirect(struct dst_entry *dst) { } static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { return 0; } static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { return -EOPNOTSUPP; } static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { return -EOPNOTSUPP; } static inline int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. */ return 0; } static inline int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr) { return 0; } static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) { return 0; } static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) { return NULL; } static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { return 0; } static inline int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return -EOPNOTSUPP; } static inline int lwtunnel_input(struct sk_buff *skb) { return -EOPNOTSUPP; } static inline int lwtunnel_xmit(struct sk_buff *skb) { return -EOPNOTSUPP; } #endif /* CONFIG_LWTUNNEL */ #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) #endif /* __NET_LWTUNNEL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM vsyscall #if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define __VSYSCALL_TRACE_H #include <linux/tracepoint.h> TRACE_EVENT(emulate_vsyscall, TP_PROTO(int nr), TP_ARGS(nr), TP_STRUCT__entry(__field(int, nr)), TP_fast_assign( __entry->nr = nr; ), TP_printk("nr = %d", __entry->nr) ); #endif #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/ #define TRACE_INCLUDE_FILE vsyscall_trace #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_LIST_H #define _LINUX_LIST_H #include <linux/types.h> #include <linux/stddef.h> #include <linux/poison.h> #include <linux/const.h> #include <linux/kernel.h> /* * Simple doubly linked list implementation. * * Some of the internal functions ("__xxx") are useful when * manipulating whole lists rather than single entries, as * sometimes we already know the next/prev entries and we can * generate better code by using them directly rather than * using the generic single-entry routines. */ #define LIST_HEAD_INIT(name) { &(name), &(name) } #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) /** * INIT_LIST_HEAD - Initialize a list_head structure * @list: list_head structure to be initialized. * * Initializes the list_head to point to itself. If it is a list header, * the result is an empty list. */ static inline void INIT_LIST_HEAD(struct list_head *list) { WRITE_ONCE(list->next, list); list->prev = list; } #ifdef CONFIG_DEBUG_LIST extern bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next); extern bool __list_del_entry_valid(struct list_head *entry); #else static inline bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next) { return true; } static inline bool __list_del_entry_valid(struct list_head *entry) { return true; } #endif /* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { if (!__list_add_valid(new, prev, next)) return; next->prev = new; new->next = next; new->prev = prev; WRITE_ONCE(prev->next, new); } /** * list_add - add a new entry * @new: new entry to be added * @head: list head to add it after * * Insert a new entry after the specified head. * This is good for implementing stacks. */ static inline void list_add(struct list_head *new, struct list_head *head) { __list_add(new, head, head->next); } /** * list_add_tail - add a new entry * @new: new entry to be added * @head: list head to add it before * * Insert a new entry before the specified head. * This is useful for implementing queues. */ static inline void list_add_tail(struct list_head *new, struct list_head *head) { __list_add(new, head->prev, head); } /* * Delete a list entry by making the prev/next entries * point to each other. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_del(struct list_head * prev, struct list_head * next) { next->prev = prev; WRITE_ONCE(prev->next, next); } /* * Delete a list entry and clear the 'prev' pointer. * * This is a special-purpose list clearing method used in the networking code * for lists allocated as per-cpu, where we don't want to incur the extra * WRITE_ONCE() overhead of a regular list_del_init(). The code that uses this * needs to check the node 'prev' pointer instead of calling list_empty(). */ static inline void __list_del_clearprev(struct list_head *entry) { __list_del(entry->prev, entry->next); entry->prev = NULL; } static inline void __list_del_entry(struct list_head *entry) { if (!__list_del_entry_valid(entry)) return; __list_del(entry->prev, entry->next); } /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ static inline void list_del(struct list_head *entry) { __list_del_entry(entry); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } /** * list_replace - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert * * If @old was empty, it will be overwritten. */ static inline void list_replace(struct list_head *old, struct list_head *new) { new->next = old->next; new->next->prev = new; new->prev = old->prev; new->prev->next = new; } /** * list_replace_init - replace old entry by new one and initialize the old one * @old : the element to be replaced * @new : the new element to insert * * If @old was empty, it will be overwritten. */ static inline void list_replace_init(struct list_head *old, struct list_head *new) { list_replace(old, new); INIT_LIST_HEAD(old); } /** * list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position * @entry1: the location to place entry2 * @entry2: the location to place entry1 */ static inline void list_swap(struct list_head *entry1, struct list_head *entry2) { struct list_head *pos = entry2->prev; list_del(entry2); list_replace(entry1, entry2); if (pos == entry1) pos = entry2; list_add(entry1, pos); } /** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. */ static inline void list_del_init(struct list_head *entry) { __list_del_entry(entry); INIT_LIST_HEAD(entry); } /** * list_move - delete from one list and add as another's head * @list: the entry to move * @head: the head that will precede our entry */ static inline void list_move(struct list_head *list, struct list_head *head) { __list_del_entry(list); list_add(list, head); } /** * list_move_tail - delete from one list and add as another's tail * @list: the entry to move * @head: the head that will follow our entry */ static inline void list_move_tail(struct list_head *list, struct list_head *head) { __list_del_entry(list); list_add_tail(list, head); } /** * list_bulk_move_tail - move a subsection of a list to its tail * @head: the head that will follow our entry * @first: first entry to move * @last: last entry to move, can be the same as first * * Move all entries between @first and including @last before @head. * All three entries must belong to the same linked list. */ static inline void list_bulk_move_tail(struct list_head *head, struct list_head *first, struct list_head *last) { first->prev->next = last->next; last->next->prev = first->prev; head->prev->next = first; first->prev = head->prev; last->next = head; head->prev = last; } /** * list_is_first -- tests whether @list is the first entry in list @head * @list: the entry to test * @head: the head of the list */ static inline int list_is_first(const struct list_head *list, const struct list_head *head) { return list->prev == head; } /** * list_is_last - tests whether @list is the last entry in list @head * @list: the entry to test * @head: the head of the list */ static inline int list_is_last(const struct list_head *list, const struct list_head *head) { return list->next == head; } /** * list_empty - tests whether a list is empty * @head: the list to test. */ static inline int list_empty(const struct list_head *head) { return READ_ONCE(head->next) == head; } /** * list_del_init_careful - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. * * This is the same as list_del_init(), except designed to be used * together with list_empty_careful() in a way to guarantee ordering * of other memory operations. * * Any memory operations done before a list_del_init_careful() are * guaranteed to be visible after a list_empty_careful() test. */ static inline void list_del_init_careful(struct list_head *entry) { __list_del_entry(entry); entry->prev = entry; smp_store_release(&entry->next, entry); } /** * list_empty_careful - tests whether a list is empty and not being modified * @head: the list to test * * Description: * tests whether a list is empty _and_ checks that no other CPU might be * in the process of modifying either member (next or prev) * * NOTE: using list_empty_careful() without synchronization * can only be safe if the only activity that can happen * to the list entry is list_del_init(). Eg. it cannot be used * if another CPU could re-list_add() it. */ static inline int list_empty_careful(const struct list_head *head) { struct list_head *next = smp_load_acquire(&head->next); return (next == head) && (next == head->prev); } /** * list_rotate_left - rotate the list to the left * @head: the head of the list */ static inline void list_rotate_left(struct list_head *head) { struct list_head *first; if (!list_empty(head)) { first = head->next; list_move_tail(first, head); } } /** * list_rotate_to_front() - Rotate list to specific item. * @list: The desired new front of the list. * @head: The head of the list. * * Rotates list so that @list becomes the new front of the list. */ static inline void list_rotate_to_front(struct list_head *list, struct list_head *head) { /* * Deletes the list head from the list denoted by @head and * places it as the tail of @list, this effectively rotates the * list so that @list is at the front. */ list_move_tail(head, list); } /** * list_is_singular - tests whether a list has just one entry. * @head: the list to test. */ static inline int list_is_singular(const struct list_head *head) { return !list_empty(head) && (head->next == head->prev); } static inline void __list_cut_position(struct list_head *list, struct list_head *head, struct list_head *entry) { struct list_head *new_first = entry->next; list->next = head->next; list->next->prev = list; list->prev = entry; entry->next = list; head->next = new_first; new_first->prev = head; } /** * list_cut_position - cut a list into two * @list: a new list to add all removed entries * @head: a list with entries * @entry: an entry within head, could be the head itself * and if so we won't cut the list * * This helper moves the initial part of @head, up to and * including @entry, from @head to @list. You should * pass on @entry an element you know is on @head. @list * should be an empty list or a list you do not care about * losing its data. * */ static inline void list_cut_position(struct list_head *list, struct list_head *head, struct list_head *entry) { if (list_empty(head)) return; if (list_is_singular(head) && (head->next != entry && head != entry)) return; if (entry == head) INIT_LIST_HEAD(list); else __list_cut_position(list, head, entry); } /** * list_cut_before - cut a list into two, before given entry * @list: a new list to add all removed entries * @head: a list with entries * @entry: an entry within head, could be the head itself * * This helper moves the initial part of @head, up to but * excluding @entry, from @head to @list. You should pass * in @entry an element you know is on @head. @list should * be an empty list or a list you do not care about losing * its data. * If @entry == @head, all entries on @head are moved to * @list. */ static inline void list_cut_before(struct list_head *list, struct list_head *head, struct list_head *entry) { if (head->next == entry) { INIT_LIST_HEAD(list); return; } list->next = head->next; list->next->prev = list; list->prev = entry->prev; list->prev->next = list; head->next = entry; entry->prev = head; } static inline void __list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) { struct list_head *first = list->next; struct list_head *last = list->prev; first->prev = prev; prev->next = first; last->next = next; next->prev = last; } /** * list_splice - join two lists, this is designed for stacks * @list: the new list to add. * @head: the place to add it in the first list. */ static inline void list_splice(const struct list_head *list, struct list_head *head) { if (!list_empty(list)) __list_splice(list, head, head->next); } /** * list_splice_tail - join two lists, each list being a queue * @list: the new list to add. * @head: the place to add it in the first list. */ static inline void list_splice_tail(struct list_head *list, struct list_head *head) { if (!list_empty(list)) __list_splice(list, head->prev, head); } /** * list_splice_init - join two lists and reinitialise the emptied list. * @list: the new list to add. * @head: the place to add it in the first list. * * The list at @list is reinitialised */ static inline void list_splice_init(struct list_head *list, struct list_head *head) { if (!list_empty(list)) { __list_splice(list, head, head->next); INIT_LIST_HEAD(list); } } /** * list_splice_tail_init - join two lists and reinitialise the emptied list * @list: the new list to add. * @head: the place to add it in the first list. * * Each of the lists is a queue. * The list at @list is reinitialised */ static inline void list_splice_tail_init(struct list_head *list, struct list_head *head) { if (!list_empty(list)) { __list_splice(list, head->prev, head); INIT_LIST_HEAD(list); } } /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. */ #define list_entry(ptr, type, member) \ container_of(ptr, type, member) /** * list_first_entry - get the first element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * Note, that list is expected to be not empty. */ #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) /** * list_last_entry - get the last element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * Note, that list is expected to be not empty. */ #define list_last_entry(ptr, type, member) \ list_entry((ptr)->prev, type, member) /** * list_first_entry_or_null - get the first element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * Note that if the list is empty, it returns NULL. */ #define list_first_entry_or_null(ptr, type, member) ({ \ struct list_head *head__ = (ptr); \ struct list_head *pos__ = READ_ONCE(head__->next); \ pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ }) /** * list_next_entry - get the next element in list * @pos: the type * to cursor * @member: the name of the list_head within the struct. */ #define list_next_entry(pos, member) \ list_entry((pos)->member.next, typeof(*(pos)), member) /** * list_prev_entry - get the prev element in list * @pos: the type * to cursor * @member: the name of the list_head within the struct. */ #define list_prev_entry(pos, member) \ list_entry((pos)->member.prev, typeof(*(pos)), member) /** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. */ #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) /** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. * * Continue to iterate over a list, continuing after the current position. */ #define list_for_each_continue(pos, head) \ for (pos = pos->next; pos != (head); pos = pos->next) /** * list_for_each_prev - iterate over a list backwards * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. */ #define list_for_each_prev(pos, head) \ for (pos = (head)->prev; pos != (head); pos = pos->prev) /** * list_for_each_safe - iterate over a list safe against removal of list entry * @pos: the &struct list_head to use as a loop cursor. * @n: another &struct list_head to use as temporary storage * @head: the head for your list. */ #define list_for_each_safe(pos, n, head) \ for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) /** * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry * @pos: the &struct list_head to use as a loop cursor. * @n: another &struct list_head to use as temporary storage * @head: the head for your list. */ #de