1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 // SPDX-License-Identifier: GPL-2.0-only /* * mm/mmap.c * * Written by obz. * * Address space accounting code <alan@lxorguk.ukuu.org.uk> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/mm.h> #include <linux/vmacache.h> #include <linux/shm.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/syscalls.h> #include <linux/capability.h> #include <linux/init.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/personality.h> #include <linux/security.h> #include <linux/hugetlb.h> #include <linux/shmem_fs.h> #include <linux/profile.h> #include <linux/export.h> #include <linux/mount.h> #include <linux/mempolicy.h> #include <linux/rmap.h> #include <linux/mmu_notifier.h> #include <linux/mmdebug.h> #include <linux/perf_event.h> #include <linux/audit.h> #include <linux/khugepaged.h> #include <linux/uprobes.h> #include <linux/rbtree_augmented.h> #include <linux/notifier.h> #include <linux/memory.h> #include <linux/printk.h> #include <linux/userfaultfd_k.h> #include <linux/moduleparam.h> #include <linux/pkeys.h> #include <linux/oom.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/tlb.h> #include <asm/mmu_context.h> #define CREATE_TRACE_POINTS #include <trace/events/mmap.h> #include "internal.h" #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) #endif #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS; #endif #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN; const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; #endif static bool ignore_rlimit_data; core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end); /* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: * * map_type prot * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (yes) yes w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 }; #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT static inline pgprot_t arch_filter_pgprot(pgprot_t prot) { return prot; } #endif pgprot_t vm_get_page_prot(unsigned long vm_flags) { pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | pgprot_val(arch_vm_get_page_prot(vm_flags))); return arch_filter_pgprot(ret); } EXPORT_SYMBOL(vm_get_page_prot); static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) { return pgprot_modify(oldprot, vm_get_page_prot(vm_flags)); } /* Update vma->vm_page_prot to reflect vma->vm_flags. */ void vma_set_page_prot(struct vm_area_struct *vma) { unsigned long vm_flags = vma->vm_flags; pgprot_t vm_page_prot; vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags); if (vma_wants_writenotify(vma, vm_page_prot)) { vm_flags &= ~VM_SHARED; vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags); } /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ WRITE_ONCE(vma->vm_page_prot, vm_page_prot); } /* * Requires inode->i_mapping->i_mmap_rwsem */ static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping) { if (vma->vm_flags & VM_DENYWRITE) allow_write_access(file); if (vma->vm_flags & VM_SHARED) mapping_unmap_writable(mapping); flush_dcache_mmap_lock(mapping); vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } /* * Unlink a file-based vm structure from its interval tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ void unlink_file_vma(struct vm_area_struct *vma) { struct file *file = vma->vm_file; if (file) { struct address_space *mapping = file->f_mapping; i_mmap_lock_write(mapping); __remove_shared_vm_struct(vma, file, mapping); i_mmap_unlock_write(mapping); } } /* * Close a vm structure and free it, returning the next. */ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) { struct vm_area_struct *next = vma->vm_next; might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); vm_area_free(vma); return next; } static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long retval; unsigned long newbrk, oldbrk, origbrk; struct mm_struct *mm = current->mm; struct vm_area_struct *next; unsigned long min_brk; bool populate; bool downgraded = false; LIST_HEAD(uf); if (mmap_write_lock_killable(mm)) return -EINTR; origbrk = mm->brk; #ifdef CONFIG_COMPAT_BRK /* * CONFIG_COMPAT_BRK can still be overridden by setting * randomize_va_space to 2, which will still cause mm->start_brk * to be arbitrarily shifted */ if (current->brk_randomized) min_brk = mm->start_brk; else min_brk = mm->end_data; #else min_brk = mm->start_brk; #endif if (brk < min_brk) goto out; /* * Check against rlimit here. If this check is done later after the test * of oldbrk with newbrk then it can escape the test and let the data * segment grow beyond its set limit the in case where the limit is * not page aligned -Ram Gupta */ if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk, mm->end_data, mm->start_data)) goto out; newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); if (oldbrk == newbrk) { mm->brk = brk; goto success; } /* * Always allow shrinking brk. * __do_munmap() may downgrade mmap_lock to read. */ if (brk <= mm->brk) { int ret; /* * mm->brk must to be protected by write mmap_lock so update it * before downgrading mmap_lock. When __do_munmap() fails, * mm->brk will be restored from origbrk. */ mm->brk = brk; ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true); if (ret < 0) { mm->brk = origbrk; goto out; } else if (ret == 1) { downgraded = true; } goto success; } /* Check against existing mmap mappings. */ next = find_vma(mm, oldbrk); if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) goto out; /* Ok, looks good - let it rip. */ if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0) goto out; mm->brk = brk; success: populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; if (downgraded) mmap_read_unlock(mm); else mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(oldbrk, newbrk - oldbrk); return brk; out: retval = origbrk; mmap_write_unlock(mm); return retval; } static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) { unsigned long gap, prev_end; /* * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we * allow two stack_guard_gaps between them here, and when choosing * an unmapped area; whereas when expanding we only require one. * That's a little inconsistent, but keeps the code here simpler. */ gap = vm_start_gap(vma); if (vma->vm_prev) { prev_end = vm_end_gap(vma->vm_prev); if (gap > prev_end) gap -= prev_end; else gap = 0; } return gap; } #ifdef CONFIG_DEBUG_VM_RB static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma) { unsigned long max = vma_compute_gap(vma), subtree_gap; if (vma->vm_rb.rb_left) { subtree_gap = rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb)->rb_subtree_gap; if (subtree_gap > max) max = subtree_gap; } if (vma->vm_rb.rb_right) { subtree_gap = rb_entry(vma->vm_rb.rb_right, struct vm_area_struct, vm_rb)->rb_subtree_gap; if (subtree_gap > max) max = subtree_gap; } return max; } static int browse_rb(struct mm_struct *mm) { struct rb_root *root = &mm->mm_rb; int i = 0, j, bug = 0; struct rb_node *nd, *pn = NULL; unsigned long prev = 0, pend = 0; for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); if (vma->vm_start < prev) { pr_emerg("vm_start %lx < prev %lx\n", vma->vm_start, prev); bug = 1; } if (vma->vm_start < pend) { pr_emerg("vm_start %lx < pend %lx\n", vma->vm_start, pend); bug = 1; } if (vma->vm_start > vma->vm_end) { pr_emerg("vm_start %lx > vm_end %lx\n", vma->vm_start, vma->vm_end); bug = 1; } spin_lock(&mm->page_table_lock); if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { pr_emerg("free gap %lx, correct %lx\n", vma->rb_subtree_gap, vma_compute_subtree_gap(vma)); bug = 1; } spin_unlock(&mm->page_table_lock); i++; pn = nd; prev = vma->vm_start; pend = vma->vm_end; } j = 0; for (nd = pn; nd; nd = rb_prev(nd)) j++; if (i != j) { pr_emerg("backwards %d, forwards %d\n", j, i); bug = 1; } return bug ? -1 : i; } static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) { struct rb_node *nd; for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); VM_BUG_ON_VMA(vma != ignore && vma->rb_subtree_gap != vma_compute_subtree_gap(vma), vma); } } static void validate_mm(struct mm_struct *mm) { int bug = 0; int i = 0; unsigned long highest_address = 0; struct vm_area_struct *vma = mm->mmap; while (vma) { struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; if (anon_vma) { anon_vma_lock_read(anon_vma); list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) anon_vma_interval_tree_verify(avc); anon_vma_unlock_read(anon_vma); } highest_address = vm_end_gap(vma); vma = vma->vm_next; i++; } if (i != mm->map_count) { pr_emerg("map_count %d vm_next %d\n", mm->map_count, i); bug = 1; } if (highest_address != mm->highest_vm_end) { pr_emerg("mm->highest_vm_end %lx, found %lx\n", mm->highest_vm_end, highest_address); bug = 1; } i = browse_rb(mm); if (i != mm->map_count) { if (i != -1) pr_emerg("map_count %d rb %d\n", mm->map_count, i); bug = 1; } VM_BUG_ON_MM(bug, mm); } #else #define validate_mm_rb(root, ignore) do { } while (0) #define validate_mm(mm) do { } while (0) #endif RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks, struct vm_area_struct, vm_rb, unsigned long, rb_subtree_gap, vma_compute_gap) /* * Update augmented rbtree rb_subtree_gap values after vma->vm_start or * vma->vm_prev->vm_end values changed, without modifying the vma's position * in the rbtree. */ static void vma_gap_update(struct vm_area_struct *vma) { /* * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created * a callback function that does exactly what we want. */ vma_gap_callbacks_propagate(&vma->vm_rb, NULL); } static inline void vma_rb_insert(struct vm_area_struct *vma, struct rb_root *root) { /* All rb_subtree_gap values must be consistent prior to insertion */ validate_mm_rb(root, NULL); rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) { /* * Note rb_erase_augmented is a fairly large inline function, * so make sure we instantiate it only once with our desired * augmented rbtree callbacks. */ rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma, struct rb_root *root, struct vm_area_struct *ignore) { /* * All rb_subtree_gap values must be consistent prior to erase, * with the possible exception of * * a. the "next" vma being erased if next->vm_start was reduced in * __vma_adjust() -> __vma_unlink() * b. the vma being erased in detach_vmas_to_be_unmapped() -> * vma_rb_erase() */ validate_mm_rb(root, ignore); __vma_rb_erase(vma, root); } static __always_inline void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) { vma_rb_erase_ignore(vma, root, vma); } /* * vma has some anon_vma assigned, and is already inserted on that * anon_vma's interval trees. * * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the * vma must be removed from the anon_vma's interval trees using * anon_vma_interval_tree_pre_update_vma(). * * After the update, the vma will be reinserted using * anon_vma_interval_tree_post_update_vma(). * * The entire update must be protected by exclusive mmap_lock and by * the root anon_vma's mutex. */ static inline void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma) { struct anon_vma_chain *avc; list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root); } static inline void anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma) { struct anon_vma_chain *avc; list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root); } static int find_vma_links(struct mm_struct *mm, unsigned long addr, unsigned long end, struct vm_area_struct **pprev, struct rb_node ***rb_link, struct rb_node **rb_parent) { struct rb_node **__rb_link, *__rb_parent, *rb_prev; __rb_link = &mm->mm_rb.rb_node; rb_prev = __rb_parent = NULL; while (*__rb_link) { struct vm_area_struct *vma_tmp; __rb_parent = *__rb_link; vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); if (vma_tmp->vm_end > addr) { /* Fail if an existing vma overlaps the area */ if (vma_tmp->vm_start < end) return -ENOMEM; __rb_link = &__rb_parent->rb_left; } else { rb_prev = __rb_parent; __rb_link = &__rb_parent->rb_right; } } *pprev = NULL; if (rb_prev) *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); *rb_link = __rb_link; *rb_parent = __rb_parent; return 0; } /* * vma_next() - Get the next VMA. * @mm: The mm_struct. * @vma: The current vma. * * If @vma is NULL, return the first vma in the mm. * * Returns: The next VMA after @vma. */ static inline struct vm_area_struct *vma_next(struct mm_struct *mm, struct vm_area_struct *vma) { if (!vma) return mm->mmap; return vma->vm_next; } /* * munmap_vma_range() - munmap VMAs that overlap a range. * @mm: The mm struct * @start: The start of the range. * @len: The length of the range. * @pprev: pointer to the pointer that will be set to previous vm_area_struct * @rb_link: the rb_node * @rb_parent: the parent rb_node * * Find all the vm_area_struct that overlap from @start to * @end and munmap them. Set @pprev to the previous vm_area_struct. * * Returns: -ENOMEM on munmap failure or 0 on success. */ static inline int munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len, struct vm_area_struct **pprev, struct rb_node ***link, struct rb_node **parent, struct list_head *uf) { while (find_vma_links(mm, start, start + len, pprev, link, parent)) if (do_munmap(mm, start, len, uf)) return -ENOMEM; return 0; } static unsigned long count_vma_pages_range(struct mm_struct *mm, unsigned long addr, unsigned long end) { unsigned long nr_pages = 0; struct vm_area_struct *vma; /* Find first overlaping mapping */ vma = find_vma_intersection(mm, addr, end); if (!vma) return 0; nr_pages = (min(end, vma->vm_end) - max(addr, vma->vm_start)) >> PAGE_SHIFT; /* Iterate over the rest of the overlaps */ for (vma = vma->vm_next; vma; vma = vma->vm_next) { unsigned long overlap_len; if (vma->vm_start > end) break; overlap_len = min(end, vma->vm_end) - vma->vm_start; nr_pages += overlap_len >> PAGE_SHIFT; } return nr_pages; } void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); else mm->highest_vm_end = vm_end_gap(vma); /* * vma->vm_prev wasn't known when we followed the rbtree to find the * correct insertion point for that vma. As a result, we could not * update the vma vm_rb parents rb_subtree_gap values on the way down. * So, we first insert the vma with a zero rb_subtree_gap value * (to be consistent with what we did on the way down), and then * immediately update the gap to the correct value. Finally we * rebalance the rbtree after all augmented values have been set. */ rb_link_node(&vma->vm_rb, rb_parent, rb_link); vma->rb_subtree_gap = 0; vma_gap_update(vma); vma_rb_insert(vma, &mm->mm_rb); } static void __vma_link_file(struct vm_area_struct *vma) { struct file *file; file = vma->vm_file; if (file) { struct address_space *mapping = file->f_mapping; if (vma->vm_flags & VM_DENYWRITE) put_write_access(file_inode(file)); if (vma->vm_flags & VM_SHARED) mapping_allow_writable(mapping); flush_dcache_mmap_lock(mapping); vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } } static void __vma_link(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node **rb_link, struct rb_node *rb_parent) { __vma_link_list(mm, vma, prev); __vma_link_rb(mm, vma, rb_link, rb_parent); } static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node **rb_link, struct rb_node *rb_parent) { struct address_space *mapping = NULL; if (vma->vm_file) { mapping = vma->vm_file->f_mapping; i_mmap_lock_write(mapping); } __vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link_file(vma); if (mapping) i_mmap_unlock_write(mapping); mm->map_count++; validate_mm(mm); } /* * Helper for vma_adjust() in the split_vma insert case: insert a vma into the * mm's list and rbtree. It has already been inserted into the interval tree. */ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; struct rb_node **rb_link, *rb_parent; if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) BUG(); __vma_link(mm, vma, prev, rb_link, rb_parent); mm->map_count++; } static __always_inline void __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *ignore) { vma_rb_erase_ignore(vma, &mm->mm_rb, ignore); __vma_unlink_list(mm, vma); /* Kill the cache */ vmacache_invalidate(mm); } /* * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that * is already present in an i_mmap tree without adjusting the tree. * The following helper function should be used when such adjustments * are necessary. The "insert" vma (if any) is to be inserted * before we drop the necessary locks. */ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, struct vm_area_struct *expand) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; struct address_space *mapping = NULL; struct rb_root_cached *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; bool start_changed = false, end_changed = false; long adjust_next = 0; int remove_next = 0; if (next && !insert) { struct vm_area_struct *exporter = NULL, *importer = NULL; if (end >= next->vm_end) { /* * vma expands, overlapping all the next, and * perhaps the one after too (mprotect case 6). * The only other cases that gets here are * case 1, case 7 and case 8. */ if (next == expand) { /* * The only case where we don't expand "vma" * and we expand "next" instead is case 8. */ VM_WARN_ON(end != next->vm_end); /* * remove_next == 3 means we're * removing "vma" and that to do so we * swapped "vma" and "next". */ remove_next = 3; VM_WARN_ON(file != next->vm_file); swap(vma, next); } else { VM_WARN_ON(expand != vma); /* * case 1, 6, 7, remove_next == 2 is case 6, * remove_next == 1 is case 1 or 7. */ remove_next = 1 + (end > next->vm_end); VM_WARN_ON(remove_next == 2 && end != next->vm_next->vm_end); /* trim end to next, for case 6 first pass */ end = next->vm_end; } exporter = next; importer = vma; /* * If next doesn't have anon_vma, import from vma after * next, if the vma overlaps with it. */ if (remove_next == 2 && !next->anon_vma) exporter = next->vm_next; } else if (end > next->vm_start) { /* * vma expands, overlapping part of the next: * mprotect case 5 shifting the boundary up. */ adjust_next = (end - next->vm_start); exporter = next; importer = vma; VM_WARN_ON(expand != importer); } else if (end < vma->vm_end) { /* * vma shrinks, and !insert tells it's not * split_vma inserting another: so it must be * mprotect case 4 shifting the boundary down. */ adjust_next = -(vma->vm_end - end); exporter = vma; importer = next; VM_WARN_ON(expand != importer); } /* * Easily overlooked: when mprotect shifts the boundary, * make sure the expanding vma has anon_vma set if the * shrinking vma had, to cover any anon pages imported. */ if (exporter && exporter->anon_vma && !importer->anon_vma) { int error; importer->anon_vma = exporter->anon_vma; error = anon_vma_clone(importer, exporter); if (error) return error; } } again: vma_adjust_trans_huge(orig_vma, start, end, adjust_next); if (file) { mapping = file->f_mapping; root = &mapping->i_mmap; uprobe_munmap(vma, vma->vm_start, vma->vm_end); if (adjust_next) uprobe_munmap(next, next->vm_start, next->vm_end); i_mmap_lock_write(mapping); if (insert) { /* * Put into interval tree now, so instantiated pages * are visible to arm/parisc __flush_dcache_page * throughout; but we cannot insert into address * space until vma start or end is updated. */ __vma_link_file(insert); } } anon_vma = vma->anon_vma; if (!anon_vma && adjust_next) anon_vma = next->anon_vma; if (anon_vma) { VM_WARN_ON(adjust_next && next->anon_vma && anon_vma != next->anon_vma); anon_vma_lock_write(anon_vma); anon_vma_interval_tree_pre_update_vma(vma); if (adjust_next) anon_vma_interval_tree_pre_update_vma(next); } if (file) { flush_dcache_mmap_lock(mapping); vma_interval_tree_remove(vma, root); if (adjust_next) vma_interval_tree_remove(next, root); } if (start != vma->vm_start) { vma->vm_start = start; start_changed = true; } if (end != vma->vm_end) { vma->vm_end = end; end_changed = true; } vma->vm_pgoff = pgoff; if (adjust_next) { next->vm_start += adjust_next; next->vm_pgoff += adjust_next >> PAGE_SHIFT; } if (file) { if (adjust_next) vma_interval_tree_insert(next, root); vma_interval_tree_insert(vma, root); flush_dcache_mmap_unlock(mapping); } if (remove_next) { /* * vma_merge has merged next into vma, and needs * us to remove next before dropping the locks. */ if (remove_next != 3) __vma_unlink(mm, next, next); else /* * vma is not before next if they've been * swapped. * * pre-swap() next->vm_start was reduced so * tell validate_mm_rb to ignore pre-swap() * "next" (which is stored in post-swap() * "vma"). */ __vma_unlink(mm, next, vma); if (file) __remove_shared_vm_struct(next, file, mapping); } else if (insert) { /* * split_vma has split insert from vma, and needs * us to insert it before dropping the locks * (it may either follow vma or precede it). */ __insert_vm_struct(mm, insert); } else { if (start_changed) vma_gap_update(vma); if (end_changed) { if (!next) mm->highest_vm_end = vm_end_gap(vma); else if (!adjust_next) vma_gap_update(next); } } if (anon_vma) { anon_vma_interval_tree_post_update_vma(vma); if (adjust_next) anon_vma_interval_tree_post_update_vma(next); anon_vma_unlock_write(anon_vma); } if (file) { i_mmap_unlock_write(mapping); uprobe_mmap(vma); if (adjust_next) uprobe_mmap(next); } if (remove_next) { if (file) { uprobe_munmap(next, next->vm_start, next->vm_end); fput(file); } if (next->anon_vma) anon_vma_merge(vma, next); mm->map_count--; mpol_put(vma_policy(next)); vm_area_free(next); /* * In mprotect's case 6 (see comments on vma_merge), * we must remove another next too. It would clutter * up the code too much to do both in one go. */ if (remove_next != 3) { /* * If "next" was removed and vma->vm_end was * expanded (up) over it, in turn * "next->vm_prev->vm_end" changed and the * "vma->vm_next" gap must be updated. */ next = vma->vm_next; } else { /* * For the scope of the comment "next" and * "vma" considered pre-swap(): if "vma" was * removed, next->vm_start was expanded (down) * over it and the "next" gap must be updated. * Because of the swap() the post-swap() "vma" * actually points to pre-swap() "next" * (post-swap() "next" as opposed is now a * dangling pointer). */ next = vma; } if (remove_next == 2) { remove_next = 1; end = next->vm_end; goto again; } else if (next) vma_gap_update(next); else { /* * If remove_next == 2 we obviously can't * reach this path. * * If remove_next == 3 we can't reach this * path because pre-swap() next is always not * NULL. pre-swap() "next" is not being * removed and its next->vm_end is not altered * (and furthermore "end" already matches * next->vm_end in remove_next == 3). * * We reach this only in the remove_next == 1 * case if the "next" vma that was removed was * the highest vma of the mm. However in such * case next->vm_end == "end" and the extended * "vma" has vma->vm_end == next->vm_end so * mm->highest_vm_end doesn't need any update * in remove_next == 1 case. */ VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma)); } } if (insert && file) uprobe_mmap(insert); validate_mm(mm); return 0; } /* * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. */ static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags, struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { /* * VM_SOFTDIRTY should not prevent from VMA merging, if we * match the flags but dirty bit -- the caller should mark * merged VMA as dirty. If dirty bit won't be excluded from * comparison, we increase pressure on the memory system forcing * the kernel to generate new VMAs when old one could be * extended instead. */ if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY) return 0; if (vma->vm_file != file) return 0; if (vma->vm_ops && vma->vm_ops->close) return 0; if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) return 0; return 1; } static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1, struct anon_vma *anon_vma2, struct vm_area_struct *vma) { /* * The list_is_singular() test is to avoid merging VMA cloned from * parents. This can improve scalability caused by anon_vma lock. */ if ((!anon_vma1 || !anon_vma2) && (!vma || list_is_singular(&vma->anon_vma_chain))) return 1; return anon_vma1 == anon_vma2; } /* * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) * in front of (at a lower virtual address and file offset than) the vma. * * We cannot merge two vmas if they have differently assigned (non-NULL) * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. * * We don't check here for the merged mmap wrapping around the end of pagecache * indices (16TB on ia32) because do_mmap() does not permit mmap's which * wrap, nor mmaps which cover the final page at index -1UL. */ static int can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { if (vma->vm_pgoff == vm_pgoff) return 1; } return 0; } /* * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) * beyond (at a higher virtual address and file offset than) the vma. * * We cannot merge two vmas if they have differently assigned (non-NULL) * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. */ static int can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { pgoff_t vm_pglen; vm_pglen = vma_pages(vma); if (vma->vm_pgoff + vm_pglen == vm_pgoff) return 1; } return 0; } /* * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out * whether that can be merged with its predecessor or its successor. * Or both (it neatly fills a hole). * * In most cases - when called for mmap, brk or mremap - [addr,end) is * certain not to be mapped by the time vma_merge is called; but when * called for mprotect, it is certain to be already mapped (either at * an offset within prev, or at the start of next), and the flags of * this area are about to be changed to vm_flags - and the no-change * case has already been eliminated. * * The following mprotect cases have to be considered, where AAAA is * the area passed down from mprotect_fixup, never extending beyond one * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after: * * AAAA AAAA AAAA * PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN * cannot merge might become might become * PPNNNNNNNNNN PPPPPPPPPPNN * mmap, brk or case 4 below case 5 below * mremap move: * AAAA AAAA * PPPP NNNN PPPPNNNNXXXX * might become might become * PPPPPPPPPPPP 1 or PPPPPPPPPPPP 6 or * PPPPPPPPNNNN 2 or PPPPPPPPXXXX 7 or * PPPPNNNNNNNN 3 PPPPXXXXXXXX 8 * * It is important for case 8 that the vma NNNN overlapping the * region AAAA is never going to extended over XXXX. Instead XXXX must * be extended in region AAAA and NNNN must be removed. This way in * all cases where vma_merge succeeds, the moment vma_adjust drops the * rmap_locks, the properties of the merged vma will be already * correct for the whole merged range. Some of those properties like * vm_page_prot/vm_flags may be accessed by rmap_walks and they must * be correct for the whole merged range immediately after the * rmap_locks are released. Otherwise if XXXX would be removed and * NNNN would be extended over the XXXX range, remove_migration_ptes * or other rmap walkers (if working on addresses beyond the "end" * parameter) may establish ptes with the wrong permissions of NNNN * instead of the right permissions of XXXX. */ struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, struct vm_userfaultfd_ctx vm_userfaultfd_ctx) { pgoff_t pglen = (end - addr) >> PAGE_SHIFT; struct vm_area_struct *area, *next; int err; /* * We later require that vma->vm_flags == vm_flags, * so this tests vma->vm_flags & VM_SPECIAL, too. */ if (vm_flags & VM_SPECIAL) return NULL; next = vma_next(mm, prev); area = next; if (area && area->vm_end == end) /* cases 6, 7, 8 */ next = next->vm_next; /* verify some invariant that must be enforced by the caller */ VM_WARN_ON(prev && addr <= prev->vm_start); VM_WARN_ON(area && end > area->vm_end); VM_WARN_ON(addr >= end); /* * Can it merge with the predecessor? */ if (prev && prev->vm_end == addr && mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, anon_vma, file, pgoff, vm_userfaultfd_ctx)) { /* * OK, it can. Can we now merge in the successor as well? */ if (next && end == next->vm_start && mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, vm_userfaultfd_ctx) && is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) { /* cases 1, 6 */ err = __vma_adjust(prev, prev->vm_start, next->vm_end, prev->vm_pgoff, NULL, prev); } else /* cases 2, 5, 7 */ err = __vma_adjust(prev, prev->vm_start, end, prev->vm_pgoff, NULL, prev); if (err) return NULL; khugepaged_enter_vma_merge(prev, vm_flags); return prev; } /* * Can this new request be merged in front of next? */ if (next && end == next->vm_start && mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, vm_userfaultfd_ctx)) { if (prev && addr < prev->vm_end) /* case 4 */ err = __vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL, next); else { /* cases 3, 8 */ err = __vma_adjust(area, addr, next->vm_end, next->vm_pgoff - pglen, NULL, next); /* * In case 3 area is already equal to next and * this is a noop, but in case 8 "area" has * been removed and next was expanded over it. */ area = next; } if (err) return NULL; khugepaged_enter_vma_merge(area, vm_flags); return area; } return NULL; } /* * Rough compatibility check to quickly see if it's even worth looking * at sharing an anon_vma. * * They need to have the same vm_file, and the flags can only differ * in things that mprotect may change. * * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that * we can merge the two vma's. For example, we refuse to merge a vma if * there is a vm_ops->close() function, because that indicates that the * driver is doing some kind of reference counting. But that doesn't * really matter for the anon_vma sharing case. */ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) { return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } /* * Do some basic sanity checking to see if we can re-use the anon_vma * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be * the same as 'old', the other will be the new one that is trying * to share the anon_vma. * * NOTE! This runs with mm_sem held for reading, so it is possible that * the anon_vma of 'old' is concurrently in the process of being set up * by another page fault trying to merge _that_. But that's ok: if it * is being set up, that automatically means that it will be a singleton * acceptable for merging, so we can do all of this optimistically. But * we do that READ_ONCE() to make sure that we never re-load the pointer. * * IOW: that the "list_is_singular()" test on the anon_vma_chain only * matters for the 'stable anon_vma' case (ie the thing we want to avoid * is to return an anon_vma that is "complex" due to having gone through * a fork). * * We also make sure that the two vma's are compatible (adjacent, * and with the same memory policies). That's all stable, even with just * a read lock on the mm_sem. */ static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) { if (anon_vma_compatible(a, b)) { struct anon_vma *anon_vma = READ_ONCE(old->anon_vma); if (anon_vma && list_is_singular(&old->anon_vma_chain)) return anon_vma; } return NULL; } /* * find_mergeable_anon_vma is used by anon_vma_prepare, to check * neighbouring vmas for a suitable anon_vma, before it goes off * to allocate a new anon_vma. It checks because a repetitive * sequence of mprotects and faults may otherwise lead to distinct * anon_vmas being allocated, preventing vma merge in subsequent * mprotect. */ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) { struct anon_vma *anon_vma = NULL; /* Try next first. */ if (vma->vm_next) { anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next); if (anon_vma) return anon_vma; } /* Try prev next. */ if (vma->vm_prev) anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma); /* * We might reach here with anon_vma == NULL if we can't find * any reusable anon_vma. * There's no absolute need to look only at touching neighbours: * we could search further afield for "compatible" anon_vmas. * But it would probably just be a waste of time searching, * or lead to too many vmas hanging off the same anon_vma. * We're trying to allow mprotect remerging later on, * not trying to minimize memory used for anon_vmas. */ return anon_vma; } /* * If a hint addr is less than mmap_min_addr change hint to be as * low as possible but still greater than mmap_min_addr */ static inline unsigned long round_hint_to_min(unsigned long hint) { hint &= PAGE_MASK; if (((void *)hint != NULL) && (hint < mmap_min_addr)) return PAGE_ALIGN(mmap_min_addr); return hint; } static inline int mlock_future_check(struct mm_struct *mm, unsigned long flags, unsigned long len) { unsigned long locked, lock_limit; /* mlock MCL_FUTURE? */ if (flags & VM_LOCKED) { locked = len >> PAGE_SHIFT; locked += mm->locked_vm; lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } return 0; } static inline u64 file_mmap_size_max(struct file *file, struct inode *inode) { if (S_ISREG(inode->i_mode)) return MAX_LFS_FILESIZE; if (S_ISBLK(inode->i_mode)) return MAX_LFS_FILESIZE; if (S_ISSOCK(inode->i_mode)) return MAX_LFS_FILESIZE; /* Special "we do even unsigned file positions" case */ if (file->f_mode & FMODE_UNSIGNED_OFFSET) return 0; /* Yes, random drivers might want more. But I'm tired of buggy drivers */ return ULONG_MAX; } static inline bool file_mmap_ok(struct file *file, struct inode *inode, unsigned long pgoff, unsigned long len) { u64 maxsize = file_mmap_size_max(file, inode); if (maxsize && len > maxsize) return false; maxsize -= len; if (pgoff > maxsize >> PAGE_SHIFT) return false; return true; } /* * The caller must write-lock current->mm->mmap_lock. */ unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf) { struct mm_struct *mm = current->mm; vm_flags_t vm_flags; int pkey = 0; *populate = 0; if (!len) return -EINVAL; /* * Does the application expect PROT_READ to imply PROT_EXEC? * * (the exception is when the underlying filesystem is noexec * mounted, in which case we dont add PROT_EXEC.) */ if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) if (!(file && path_noexec(&file->f_path))) prot |= PROT_EXEC; /* force arch specific MAP_FIXED handling in get_unmapped_area */ if (flags & MAP_FIXED_NOREPLACE) flags |= MAP_FIXED; if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr); /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len) return -ENOMEM; /* offset overflow? */ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) return -EOVERFLOW; /* Too many mappings? */ if (mm->map_count > sysctl_max_map_count) return -ENOMEM; /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ addr = get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; if (flags & MAP_FIXED_NOREPLACE) { struct vm_area_struct *vma = find_vma(mm, addr); if (vma && vma->vm_start < addr + len) return -EEXIST; } if (prot == PROT_EXEC) { pkey = execute_only_pkey(mm); if (pkey < 0) pkey = 0; } /* Do simple checking here so the lower-level routines won't have * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) if (!can_do_mlock()) return -EPERM; if (mlock_future_check(mm, vm_flags, len)) return -EAGAIN; if (file) { struct inode *inode = file_inode(file); unsigned long flags_mask; if (!file_mmap_ok(file, inode, pgoff, len)) return -EOVERFLOW; flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; switch (flags & MAP_TYPE) { case MAP_SHARED: /* * Force use of MAP_SHARED_VALIDATE with non-legacy * flags. E.g. MAP_SYNC is dangerous to use with * MAP_SHARED as you don't know which consistency model * you will get. We silently ignore unsupported flags * with MAP_SHARED to preserve backward compatibility. */ flags &= LEGACY_MAP_MASK; fallthrough; case MAP_SHARED_VALIDATE: if (flags & ~flags_mask) return -EOPNOTSUPP; if (prot & PROT_WRITE) { if (!(file->f_mode & FMODE_WRITE)) return -EACCES; if (IS_SWAPFILE(file->f_mapping->host)) return -ETXTBSY; } /* * Make sure we don't allow writing to an append-only * file.. */ if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE)) return -EACCES; /* * Make sure there are no mandatory locks on the file. */ if (locks_verify_locked(file)) return -EAGAIN; vm_flags |= VM_SHARED | VM_MAYSHARE; if (!(file->f_mode & FMODE_WRITE)) vm_flags &= ~(VM_MAYWRITE | VM_SHARED); fallthrough; case MAP_PRIVATE: if (!(file->f_mode & FMODE_READ)) return -EACCES; if (path_noexec(&file->f_path)) { if (vm_flags & VM_EXEC) return -EPERM; vm_flags &= ~VM_MAYEXEC; } if (!file->f_op->mmap) return -ENODEV; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; break; default: return -EINVAL; } } else { switch (flags & MAP_TYPE) { case MAP_SHARED: if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; /* * Ignore pgoff. */ pgoff = 0; vm_flags |= VM_SHARED | VM_MAYSHARE; break; case MAP_PRIVATE: /* * Set pgoff according to addr for anon_vma. */ pgoff = addr >> PAGE_SHIFT; break; default: return -EINVAL; } } /* * Set 'VM_NORESERVE' if we should not account for the * memory use of this mapping. */ if (flags & MAP_NORESERVE) { /* We honor MAP_NORESERVE if allowed to overcommit */ if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) vm_flags |= VM_NORESERVE; /* hugetlb applies strict overcommit unless MAP_NORESERVE */ if (file && is_file_hugepages(file)) vm_flags |= VM_NORESERVE; } addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) *populate = len; return addr; } unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { struct file *file = NULL; unsigned long retval; if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); if (!file) return -EBADF; if (is_file_hugepages(file)) { len = ALIGN(len, huge_page_size(hstate_file(file))); } else if (unlikely(flags & MAP_HUGETLB)) { retval = -EINVAL; goto out_fput; } } else if (flags & MAP_HUGETLB) { struct user_struct *user = NULL; struct hstate *hs; hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (!hs) return -EINVAL; len = ALIGN(len, huge_page_size(hs)); /* * VM_NORESERVE is used because the reservations will be * taken when vm_ops->mmap() is called * A dummy user value is used because we are not locking * memory so no accounting is necessary */ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (IS_ERR(file)) return PTR_ERR(file); } flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); out_fput: if (file) fput(file); return retval; } SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) { return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } #ifdef __ARCH_WANT_SYS_OLD_MMAP struct mmap_arg_struct { unsigned long addr; unsigned long len; unsigned long prot; unsigned long flags; unsigned long fd; unsigned long offset; }; SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) { struct mmap_arg_struct a; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; if (offset_in_page(a.offset)) return -EINVAL; return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ /* * Some shared mappings will want the pages marked read-only * to track write events. If so, we'll downgrade vm_page_prot * to the private version (using protection_map[] without the * VM_SHARED bit). */ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) { vm_flags_t vm_flags = vma->vm_flags; const struct vm_operations_struct *vm_ops = vma->vm_ops; /* If it was private or non-writable, the write bit is already clear */ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) return 0; /* The backer wishes to know when pages are first written to? */ if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)) return 1; /* The open routine did something to the protections that pgprot_modify * won't preserve? */ if (pgprot_val(vm_page_prot) != pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags))) return 0; /* Do we need to track softdirty? */ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY)) return 1; /* Specialty mapping? */ if (vm_flags & VM_PFNMAP) return 0; /* Can the mapping track the dirty pages? */ return vma->vm_file && vma->vm_file->f_mapping && mapping_can_writeback(vma->vm_file->f_mapping); } /* * We account for memory if it's a private writeable mapping, * not hugepages and VM_NORESERVE wasn't set. */ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) { /* * hugetlb has its own accounting separate from the core VM * VM_HUGETLB may not be set yet so we cannot check for that flag. */ if (file && is_file_hugepages(file)) return 0; return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev, *merge; int error; struct rb_node **rb_link, *rb_parent; unsigned long charged = 0; /* Check against address space limit. */ if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { unsigned long nr_pages; /* * MAP_FIXED may remove pages of mappings that intersects with * requested mapping. Account for the pages it would unmap. */ nr_pages = count_vma_pages_range(mm, addr, addr + len); if (!may_expand_vm(mm, vm_flags, (len >> PAGE_SHIFT) - nr_pages)) return -ENOMEM; } /* Clear old maps, set up prev, rb_link, rb_parent, and uf */ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) return -ENOMEM; /* * Private writable mapping: check memory availability */ if (accountable_mapping(file, vm_flags)) { charged = len >> PAGE_SHIFT; if (security_vm_enough_memory_mm(mm, charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; } /* * Can we just expand an old mapping? */ vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX); if (vma) goto out; /* * Determine the object being mapped and call the appropriate * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; if (file) { if (vm_flags & VM_DENYWRITE) { error = deny_write_access(file); if (error) goto free_vma; } if (vm_flags & VM_SHARED) { error = mapping_map_writable(file->f_mapping); if (error) goto allow_write_and_free_vma; } /* ->mmap() can change vma->vm_file, but must guarantee that * vma_link() below can deny write-access if VM_DENYWRITE is set * and map writably if VM_SHARED is set. This usually means the * new file must not have been exposed to user-space, yet. */ vma->vm_file = get_file(file); error = call_mmap(file, vma); if (error) goto unmap_and_free_vma; /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their * f_op->mmap method. -DaveM * Bug: If addr is changed, prev, rb_link, rb_parent should * be updated for vma_link() */ WARN_ON_ONCE(addr != vma->vm_start); addr = vma->vm_start; /* If vm_flags changed after call_mmap(), we should try merge vma again * as we may succeed this time. */ if (unlikely(vm_flags != vma->vm_flags && prev)) { merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX); if (merge) { /* ->mmap() can change vma->vm_file and fput the original file. So * fput the vma->vm_file here or we would add an extra fput for file * and cause general protection fault ultimately. */ fput(vma->vm_file); vm_area_free(vma); vma = merge; /* Update vm_flags to pick up the change. */ vm_flags = vma->vm_flags; goto unmap_writable; } } vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) goto free_vma; } else { vma_set_anonymous(vma); } /* Allow architectures to sanity-check the vm_flags */ if (!arch_validate_flags(vma->vm_flags)) { error = -EINVAL; if (file) goto unmap_and_free_vma; else goto free_vma; } vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { unmap_writable: if (vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); if (vm_flags & VM_DENYWRITE) allow_write_access(file); } file = vma->vm_file; out: perf_event_mmap(vma); vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) vma->vm_flags &= VM_LOCKED_CLEAR_MASK; else mm->locked_vm += (len >> PAGE_SHIFT); } if (file) uprobe_mmap(vma); /* * New (or expanded) vma always get soft dirty status. * Otherwise user-space soft-dirty page tracker won't * be able to distinguish situation when vma area unmapped, * then new mapped in-place (which must be aimed as * a completely new data area). */ vma->vm_flags |= VM_SOFTDIRTY; vma_set_page_prot(vma); return addr; unmap_and_free_vma: vma->vm_file = NULL; fput(file); /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); charged = 0; if (vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); allow_write_and_free_vma: if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: vm_area_free(vma); unacct_error: if (charged) vm_unacct_memory(charged); return error; } static unsigned long unmapped_area(struct vm_unmapped_area_info *info) { /* * We implement the search by looking for an rbtree node that * immediately follows a suitable gap. That is, * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length; * - gap_end = vma->vm_start >= info->low_limit + length; * - gap_end - gap_start >= length */ struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; if (length < info->length) return -ENOMEM; /* Adjust search limits by the desired length */ if (info->high_limit < length) return -ENOMEM; high_limit = info->high_limit - length; if (info->low_limit > high_limit) return -ENOMEM; low_limit = info->low_limit + length; /* Check if rbtree root looks promising */ if (RB_EMPTY_ROOT(&mm->mm_rb)) goto check_highest; vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); if (vma->rb_subtree_gap < length) goto check_highest; while (true) { /* Visit left subtree if it looks promising */ gap_end = vm_start_gap(vma); if (gap_end >= low_limit && vma->vm_rb.rb_left) { struct vm_area_struct *left = rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb); if (left->rb_subtree_gap >= length) { vma = left; continue; } } gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; check_current: /* Check if current node has a suitable gap */ if (gap_start > high_limit) return -ENOMEM; if (gap_end >= low_limit && gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit right subtree if it looks promising */ if (vma->vm_rb.rb_right) { struct vm_area_struct *right = rb_entry(vma->vm_rb.rb_right, struct vm_area_struct, vm_rb); if (right->rb_subtree_gap >= length) { vma = right; continue; } } /* Go back up the rbtree to find next candidate node */ while (true) { struct rb_node *prev = &vma->vm_rb; if (!rb_parent(prev)) goto check_highest; vma = rb_entry(rb_parent(prev), struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_left) { gap_start = vm_end_gap(vma->vm_prev); gap_end = vm_start_gap(vma); goto check_current; } } } check_highest: /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */ if (gap_start > high_limit) return -ENOMEM; found: /* We found a suitable gap. Clip it with the original low_limit. */ if (gap_start < info->low_limit) gap_start = info->low_limit; /* Adjust gap address to the desired alignment */ gap_start += (info->align_offset - gap_start) & info->align_mask; VM_BUG_ON(gap_start + info->length > info->high_limit); VM_BUG_ON(gap_start + info->length > gap_end); return gap_start; } static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; if (length < info->length) return -ENOMEM; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). */ gap_end = info->high_limit; if (gap_end < length) return -ENOMEM; high_limit = gap_end - length; if (info->low_limit > high_limit) return -ENOMEM; low_limit = info->low_limit + length; /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; if (gap_start <= high_limit) goto found_highest; /* Check if rbtree root looks promising */ if (RB_EMPTY_ROOT(&mm->mm_rb)) return -ENOMEM; vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); if (vma->rb_subtree_gap < length) return -ENOMEM; while (true) { /* Visit right subtree if it looks promising */ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; if (gap_start <= high_limit && vma->vm_rb.rb_right) { struct vm_area_struct *right = rb_entry(vma->vm_rb.rb_right, struct vm_area_struct, vm_rb); if (right->rb_subtree_gap >= length) { vma = right; continue; } } check_current: /* Check if current node has a suitable gap */ gap_end = vm_start_gap(vma); if (gap_end < low_limit) return -ENOMEM; if (gap_start <= high_limit && gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit left subtree if it looks promising */ if (vma->vm_rb.rb_left) { struct vm_area_struct *left = rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb); if (left->rb_subtree_gap >= length) { vma = left; continue; } } /* Go back up the rbtree to find next candidate node */ while (true) { struct rb_node *prev = &vma->vm_rb; if (!rb_parent(prev)) return -ENOMEM; vma = rb_entry(rb_parent(prev), struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_right) { gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; goto check_current; } } } found: /* We found a suitable gap. Clip it with the original high_limit. */ if (gap_end > info->high_limit) gap_end = info->high_limit; found_highest: /* Compute highest gap address at the desired alignment */ gap_end -= info->length; gap_end -= (gap_end - info->align_offset) & info->align_mask; VM_BUG_ON(gap_end < info->low_limit); VM_BUG_ON(gap_end < gap_start); return gap_end; } /* * Search for an unmapped address range. * * We are looking for a range that: * - does not intersect with any VMA; * - is contained within the [low_limit, high_limit) interval; * - is at least the desired size. * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) */ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) { unsigned long addr; if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) addr = unmapped_area_topdown(info); else addr = unmapped_area(info); trace_vm_unmapped_area(addr, info); return addr; } #ifndef arch_get_mmap_end #define arch_get_mmap_end(addr) (TASK_SIZE) #endif #ifndef arch_get_mmap_base #define arch_get_mmap_base(addr, base) (base) #endif /* Get an address range which is currently unmapped. * For shmat() with addr=0. * * Ugly calling convention alert: * Return value with the low bits set means error value, * ie * if (ret & ~PAGE_MASK) * error = ret; * * This function "knows" that -ENOMEM has the bits set. */ #ifndef HAVE_ARCH_UNMAPPED_AREA unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct vm_unmapped_area_info info; const unsigned long mmap_end = arch_get_mmap_end(addr); if (len > mmap_end - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) return addr; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma_prev(mm, addr, &prev); if (mmap_end - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vm_start_gap(vma)) && (!prev || addr >= vm_end_gap(prev))) return addr; } info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0; info.align_offset = 0; return vm_unmapped_area(&info); } #endif /* * This mmap-allocator allocates new areas top-down from below the * stack's low limit (the base): */ #ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct vm_area_struct *vma, *prev; struct mm_struct *mm = current->mm; struct vm_unmapped_area_info info; const unsigned long mmap_end = arch_get_mmap_end(addr); /* requested length too big for entire address space */ if (len > mmap_end - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) return addr; /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma_prev(mm, addr, &prev); if (mmap_end - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vm_start_gap(vma)) && (!prev || addr >= vm_end_gap(prev))) return addr; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; info.align_offset = 0; addr = vm_unmapped_area(&info); /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ if (offset_in_page(addr)) { VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = mmap_end; addr = vm_unmapped_area(&info); } return addr; } #endif unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); unsigned long error = arch_mmap_check(addr, len, flags); if (error) return error; /* Careful about overflows.. */ if (len > TASK_SIZE) return -ENOMEM; get_area = current->mm->get_unmapped_area; if (file) { if (file->f_op->get_unmapped_area) get_area = file->f_op->get_unmapped_area; } else if (flags & MAP_SHARED) { /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. * do_mmap() will clear pgoff, so match alignment. */ pgoff = 0; get_area = shmem_get_unmapped_area; } addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; if (addr > TASK_SIZE - len) return -ENOMEM; if (offset_in_page(addr)) return -EINVAL; error = security_mmap_addr(addr); return error ? error : addr; } EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { struct rb_node *rb_node; struct vm_area_struct *vma; /* Check the cache first. */ vma = vmacache_find(mm, addr); if (likely(vma)) return vma; rb_node = mm->mm_rb.rb_node; while (rb_node) { struct vm_area_struct *tmp; tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); if (tmp->vm_end > addr) { vma = tmp; if (tmp->vm_start <= addr) break; rb_node = rb_node->rb_left; } else rb_node = rb_node->rb_right; } if (vma) vmacache_update(addr, vma); return vma; } EXPORT_SYMBOL(find_vma); /* * Same as find_vma, but also return a pointer to the previous VMA in *pprev. */ struct vm_area_struct * find_vma_prev(struct mm_struct *mm, unsigned long addr, struct vm_area_struct **pprev) { struct vm_area_struct *vma; vma = find_vma(mm, addr); if (vma) { *pprev = vma->vm_prev; } else { struct rb_node *rb_node = rb_last(&mm->mm_rb); *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL; } return vma; } /* * Verify that the stack growth is acceptable and * update accounting. This is shared with both the * grow-up and grow-down cases. */ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) { struct mm_struct *mm = vma->vm_mm; unsigned long new_start; /* address space limit tests */ if (!may_expand_vm(mm, vma->vm_flags, grow)) return -ENOMEM; /* Stack limit test */ if (size > rlimit(RLIMIT_STACK)) return -ENOMEM; /* mlock limit tests */ if (vma->vm_flags & VM_LOCKED) { unsigned long locked; unsigned long limit; locked = mm->locked_vm + grow; limit = rlimit(RLIMIT_MEMLOCK); limit >>= PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) return -ENOMEM; } /* Check to ensure the stack will not grow into a hugetlb-only region */ new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start : vma->vm_end - size; if (is_hugepage_only_range(vma->vm_mm, new_start, size)) return -EFAULT; /* * Overcommit.. This must be the final test, as it will * update security statistics. */ if (security_vm_enough_memory_mm(mm, grow)) return -ENOMEM; return 0; } #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) /* * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ int expand_upwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next; unsigned long gap_addr; int error = 0; if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; if (address >= (TASK_SIZE & PAGE_MASK)) return -ENOMEM; address += PAGE_SIZE; /* Enforce stack_guard_gap */ gap_addr = address + stack_guard_gap; /* Guard against overflow */ if (gap_addr < address || gap_addr > TASK_SIZE) gap_addr = TASK_SIZE; next = vma->vm_next; if (next && next->vm_start < gap_addr && vma_is_accessible(next)) { if (!(next->vm_flags & VM_GROWSUP)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ } /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_lock in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. */ anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address > vma->vm_end) { unsigned long size, grow; size = address - vma->vm_start; grow = (address - vma->vm_end) >> PAGE_SHIFT; error = -ENOMEM; if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { error = acct_stack_growth(vma, size, grow); if (!error) { /* * vma_gap_update() doesn't support concurrent * updates, but we only hold a shared mmap_lock * lock here, so we need to protect against * concurrent vma expansions. * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard * against concurrent vma expansions. */ spin_lock(&mm->page_table_lock); if (vma->vm_flags & VM_LOCKED) mm->locked_vm += grow; vm_stat_account(mm, vma->vm_flags, grow); anon_vma_interval_tree_pre_update_vma(vma); vma->vm_end = address; anon_vma_interval_tree_post_update_vma(vma); if (vma->vm_next) vma_gap_update(vma->vm_next); else mm->highest_vm_end = vm_end_gap(vma); spin_unlock(&mm->page_table_lock); perf_event_mmap(vma); } } } anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); return error; } #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ /* * vma is the first one with address < vma->vm_start. Have to extend vma. */ int expand_downwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *prev; int error = 0; address &= PAGE_MASK; if (address < mmap_min_addr) return -EPERM; /* Enforce stack_guard_gap */ prev = vma->vm_prev; /* Check that both stack segments have the same anon_vma? */ if (prev && !(prev->vm_flags & VM_GROWSDOWN) && vma_is_accessible(prev)) { if (address - prev->vm_end < stack_guard_gap) return -ENOMEM; } /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_lock in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. */ anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address < vma->vm_start) { unsigned long size, grow; size = vma->vm_end - address; grow = (vma->vm_start - address) >> PAGE_SHIFT; error = -ENOMEM; if (grow <= vma->vm_pgoff) { error = acct_stack_growth(vma, size, grow); if (!error) { /* * vma_gap_update() doesn't support concurrent * updates, but we only hold a shared mmap_lock * lock here, so we need to protect against * concurrent vma expansions. * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard * against concurrent vma expansions. */ spin_lock(&mm->page_table_lock); if (vma->vm_flags & VM_LOCKED) mm->locked_vm += grow; vm_stat_account(mm, vma->vm_flags, grow); anon_vma_interval_tree_pre_update_vma(vma); vma->vm_start = address; vma->vm_pgoff -= grow; anon_vma_interval_tree_post_update_vma(vma); vma_gap_update(vma); spin_unlock(&mm->page_table_lock); perf_event_mmap(vma); } } } anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); return error; } /* enforced gap between the expanding stack and other mappings. */ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; static int __init cmdline_parse_stack_guard_gap(char *p) { unsigned long val; char *endptr; val = simple_strtoul(p, &endptr, 10); if (!*endptr) stack_guard_gap = val << PAGE_SHIFT; return 0; } __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); #ifdef CONFIG_STACK_GROWSUP int expand_stack(struct vm_area_struct *vma, unsigned long address) { return expand_upwards(vma, address); } struct vm_area_struct * find_extend_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma, *prev; addr &= PAGE_MASK; vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; /* don't alter vm_end if the coredump is running */ if (!prev || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); return prev; } #else int expand_stack(struct vm_area_struct *vma, unsigned long address) { return expand_downwards(vma, address); } struct vm_area_struct * find_extend_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma; unsigned long start; addr &= PAGE_MASK; vma = find_vma(mm, addr); if (!vma) return NULL; if (vma->vm_start <= addr) return vma; if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; start = vma->vm_start; if (expand_stack(vma, addr)) return NULL; if (vma->vm_flags & VM_LOCKED) populate_vma_page_range(vma, addr, start, NULL); return vma; } #endif EXPORT_SYMBOL_GPL(find_extend_vma); /* * Ok - we have the memory areas we should free on the vma list, * so release them, and do the vma updates. * * Called with the mm semaphore held. */ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) { unsigned long nr_accounted = 0; /* Update high watermark before we lower total_vm */ update_hiwater_vm(mm); do { long nrpages = vma_pages(vma); if (vma->vm_flags & VM_ACCOUNT) nr_accounted += nrpages; vm_stat_account(mm, vma->vm_flags, -nrpages); vma = remove_vma(vma); } while (vma); vm_unacct_memory(nr_accounted); validate_mm(mm); } /* * Get rid of page table information in the indicated region. * * Called with the mm semaphore held. */ static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end) { struct vm_area_struct *next = vma_next(mm, prev); struct mmu_gather tlb; lru_add_drain(); tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end); free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, start, end); } /* * Create a list of vma's touched by the unmap, removing them from the mm's * vma list as we go.. */ static bool detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long end) { struct vm_area_struct **insertion_point; struct vm_area_struct *tail_vma = NULL; insertion_point = (prev ? &prev->vm_next : &mm->mmap); vma->vm_prev = NULL; do { vma_rb_erase(vma, &mm->mm_rb); mm->map_count--; tail_vma = vma; vma = vma->vm_next; } while (vma && vma->vm_start < end); *insertion_point = vma; if (vma) { vma->vm_prev = prev; vma_gap_update(vma); } else mm->highest_vm_end = prev ? vm_end_gap(prev) : 0; tail_vma->vm_next = NULL; /* Kill the cache */ vmacache_invalidate(mm); /* * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or * VM_GROWSUP VMA. Such VMAs can change their size under * down_read(mmap_lock) and collide with the VMA we are about to unmap. */ if (vma && (vma->vm_flags & VM_GROWSDOWN)) return false; if (prev && (prev->vm_flags & VM_GROWSUP)) return false; return true; } /* * __split_vma() bypasses sysctl_max_map_count checking. We use this where it * has already been checked or doesn't make sense to fail. */ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, int new_below) { struct vm_area_struct *new; int err; if (vma->vm_ops && vma->vm_ops->split) { err = vma->vm_ops->split(vma, addr); if (err) return err; } new = vm_area_dup(vma); if (!new) return -ENOMEM; if (new_below) new->vm_end = addr; else { new->vm_start = addr; new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } err = vma_dup_policy(vma, new); if (err) goto out_free_vma; err = anon_vma_clone(new, vma); if (err) goto out_free_mpol; if (new->vm_file) get_file(new->vm_file); if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); if (new_below) err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + ((addr - new->vm_start) >> PAGE_SHIFT), new); else err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); /* Success. */ if (!err) return 0; /* Clean everything up if vma_adjust failed. */ if (new->vm_ops && new->vm_ops->close) new->vm_ops->close(new); if (new->vm_file) fput(new->vm_file); unlink_anon_vmas(new); out_free_mpol: mpol_put(vma_policy(new)); out_free_vma: vm_area_free(new); return err; } /* * Split a vma into two pieces at address 'addr', a new vma is allocated * either for the first part or the tail. */ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, int new_below) { if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; return __split_vma(mm, vma, addr, new_below); } /* Munmap is split into 2 main parts -- this part which finds * what needs doing, and the areas themselves, which do the * work. This now handles partial unmappings. * Jeremy Fitzhardinge <jeremy@goop.org> */ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool downgrade) { unsigned long end; struct vm_area_struct *vma, *prev, *last; if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start) return -EINVAL; len = PAGE_ALIGN(len); end = start + len; if (len == 0) return -EINVAL; /* * arch_unmap() might do unmaps itself. It must be called * and finish any rbtree manipulation before this code * runs and also starts to manipulate the rbtree. */ arch_unmap(mm, start, end); /* Find the first overlapping VMA */ vma = find_vma(mm, start); if (!vma) return 0; prev = vma->vm_prev; /* we have start < vma->vm_end */ /* if it doesn't overlap, we have nothing.. */ if (vma->vm_start >= end) return 0; /* * If we need to split any vma, do it now to save pain later. * * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially * unmapped vm_area_struct will remain in use: so lower split_vma * places tmp vma above, and higher split_vma places tmp vma below. */ if (start > vma->vm_start) { int error; /* * Make sure that map_count on return from munmap() will * not exceed its limit; but let map_count go just above * its limit temporarily, to help free resources as expected. */ if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) return -ENOMEM; error = __split_vma(mm, vma, start, 0); if (error) return error; prev = vma; } /* Does it split the last one? */ last = find_vma(mm, end); if (last && end > last->vm_start) { int error = __split_vma(mm, last, end, 1); if (error) return error; } vma = vma_next(mm, prev); if (unlikely(uf)) { /* * If userfaultfd_unmap_prep returns an error the vmas * will remain splitted, but userland will get a * highly unexpected error anyway. This is no * different than the case where the first of the two * __split_vma fails, but we don't undo the first * split, despite we could. This is unlikely enough * failure that it's not worth optimizing it for. */ int error = userfaultfd_unmap_prep(vma, start, end, uf); if (error) return error; } /* * unlock any mlock()ed ranges before detaching vmas */ if (mm->locked_vm) { struct vm_area_struct *tmp = vma; while (tmp && tmp->vm_start < end) { if (tmp->vm_flags & VM_LOCKED) { mm->locked_vm -= vma_pages(tmp); munlock_vma_pages_all(tmp); } tmp = tmp->vm_next; } } /* Detach vmas from rbtree */ if (!detach_vmas_to_be_unmapped(mm, vma, prev, end)) downgrade = false; if (downgrade) mmap_write_downgrade(mm); unmap_region(mm, vma, prev, start, end); /* Fix up all other VM information */ remove_vma_list(mm, vma); return downgrade ? 1 : 0; } int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) { return __do_munmap(mm, start, len, uf, false); } static int __vm_munmap(unsigned long start, size_t len, bool downgrade) { int ret; struct mm_struct *mm = current->mm; LIST_HEAD(uf); if (mmap_write_lock_killable(mm)) return -EINTR; ret = __do_munmap(mm, start, len, &uf, downgrade); /* * Returning 1 indicates mmap_lock is downgraded. * But 1 is not legal return value of vm_munmap() and munmap(), reset * it to 0 before return. */ if (ret == 1) { mmap_read_unlock(mm); ret = 0; } else mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); return ret; } int vm_munmap(unsigned long start, size_t len) { return __vm_munmap(start, len, false); } EXPORT_SYMBOL(vm_munmap); SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { addr = untagged_addr(addr); profile_munmap(addr); return __vm_munmap(addr, len, true); } /* * Emulation of deprecated remap_file_pages() syscall. */ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long populate = 0; unsigned long ret = -EINVAL; struct file *file; pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n", current->comm, current->pid); if (prot) return ret; start = start & PAGE_MASK; size = size & PAGE_MASK; if (start + size <= start) return ret; /* Does pgoff wrap? */ if (pgoff + (size >> PAGE_SHIFT) < pgoff) return ret; if (mmap_write_lock_killable(mm)) return -EINTR; vma = find_vma(mm, start); if (!vma || !(vma->vm_flags & VM_SHARED)) goto out; if (start < vma->vm_start) goto out; if (start + size > vma->vm_end) { struct vm_area_struct *next; for (next = vma->vm_next; next; next = next->vm_next) { /* hole between vmas ? */ if (next->vm_start != next->vm_prev->vm_end) goto out; if (next->vm_file != vma->vm_file) goto out; if (next->vm_flags != vma->vm_flags) goto out; if (start + size <= next->vm_end) break; } if (!next) goto out; } prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; flags &= MAP_NONBLOCK; flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; if (vma->vm_flags & VM_LOCKED) { struct vm_area_struct *tmp; flags |= MAP_LOCKED; /* drop PG_Mlocked flag for over-mapped range */ for (tmp = vma; tmp->vm_start >= start + size; tmp = tmp->vm_next) { /* * Split pmd and munlock page on the border * of the range. */ vma_adjust_trans_huge(tmp, start, start + size, 0); munlock_vma_pages_range(tmp, max(tmp->vm_start, start), min(tmp->vm_end, start + size)); } } file = get_file(vma->vm_file); ret = do_mmap(vma->vm_file, start, size, prot, flags, pgoff, &populate, NULL); fput(file); out: mmap_write_unlock(mm); if (populate) mm_populate(ret, populate); if (!IS_ERR_VALUE(ret)) ret = 0; return ret; } /* * this is really a simplified "do_mmap". it only handles * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; unsigned long mapped_addr; /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); if (IS_ERR_VALUE(mapped_addr)) return mapped_addr; error = mlock_future_check(mm, mm->def_flags, len); if (error) return error; /* Clear old maps, set up prev, rb_link, rb_parent, and uf */ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) return -ENOMEM; /* Check against address space limits *after* clearing old maps... */ if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) return -ENOMEM; if (mm->map_count > sysctl_max_map_count) return -ENOMEM; if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) return -ENOMEM; /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX); if (vma) goto out; /* * create a vma struct for an anonymous mapping */ vma = vm_area_alloc(mm); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } vma_set_anonymous(vma); vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; vma->vm_flags = flags; vma->vm_page_prot = vm_get_page_prot(flags); vma_link(mm, vma, prev, rb_link, rb_parent); out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; mm->data_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) mm->locked_vm += (len >> PAGE_SHIFT); vma->vm_flags |= VM_SOFTDIRTY; return 0; } int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) { struct mm_struct *mm = current->mm; unsigned long len; int ret; bool populate; LIST_HEAD(uf); len = PAGE_ALIGN(request); if (len < request) return -ENOMEM; if (!len) return 0; if (mmap_write_lock_killable(mm)) return -EINTR; ret = do_brk_flags(addr, len, flags, &uf); populate = ((mm->def_flags & VM_LOCKED) != 0); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate && !ret) mm_populate(addr, len); return ret; } EXPORT_SYMBOL(vm_brk_flags); int vm_brk(unsigned long addr, unsigned long len) { return vm_brk_flags(addr, len, 0); } EXPORT_SYMBOL(vm_brk); /* Release all mmaps. */ void exit_mmap(struct mm_struct *mm) { struct mmu_gather tlb; struct vm_area_struct *vma; unsigned long nr_accounted = 0; /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); if (unlikely(mm_is_oom_victim(mm))) { /* * Manually reap the mm to free as much memory as possible. * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard * this mm from further consideration. Taking mm->mmap_lock for * write after setting MMF_OOM_SKIP will guarantee that the oom * reaper will not run on this mm again after mmap_lock is * dropped. * * Nothing can be holding mm->mmap_lock here and the above call * to mmu_notifier_release(mm) ensures mmu notifier callbacks in * __oom_reap_task_mm() will not block. * * This needs to be done before calling munlock_vma_pages_all(), * which clears VM_LOCKED, otherwise the oom reaper cannot * reliably test it. */ (void)__oom_reap_task_mm(mm); set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); mmap_write_unlock(mm); } if (mm->locked_vm) { vma = mm->mmap; while (vma) { if (vma->vm_flags & VM_LOCKED) munlock_vma_pages_all(vma); vma = vma->vm_next; } } arch_exit_mmap(mm); vma = mm->mmap; if (!vma) /* Can happen if dup_mmap() received an OOM */ return; lru_add_drain(); flush_cache_mm(mm); tlb_gather_mmu(&tlb, mm, 0, -1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); /* * Walk the list again, actually closing and freeing it, * with preemption enabled, without holding any MM locks. */ while (vma) { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); vma = remove_vma(vma); cond_resched(); } vm_unacct_memory(nr_accounted); } /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. */ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; struct rb_node **rb_link, *rb_parent; if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; /* * The vm_pgoff of a purely anonymous vma should be irrelevant * until its first write fault, when page's anon_vma and index * are set. But now set the vm_pgoff it will almost certainly * end up with (unless mremap moves it elsewhere before that * first wfault), so /proc/pid/maps tells a consistent story. * * By setting it to reflect the virtual start address of the * vma, merges and splits can happen in a seamless way, just * using the existing file pgoff checks and manipulations. * Similarly in do_mmap and in do_brk_flags. */ if (vma_is_anonymous(vma)) { BUG_ON(vma->anon_vma); vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; } vma_link(mm, vma, prev, rb_link, rb_parent); return 0; } /* * Copy the vma structure to a new location in the same mm, * prior to moving page table entries, to effect an mremap move. */ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks) { struct vm_area_struct *vma = *vmap; unsigned long vma_start = vma->vm_start; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; bool faulted_in_anon_vma = true; /* * If anonymous vma has not yet been faulted, update new pgoff * to match new location, to increase its chance of merging. */ if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) { pgoff = addr >> PAGE_SHIFT; faulted_in_anon_vma = false; } if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) return NULL; /* should never get here */ new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx); if (new_vma) { /* * Source vma may have been merged into new_vma */ if (unlikely(vma_start >= new_vma->vm_start && vma_start < new_vma->vm_end)) { /* * The only way we can get a vma_merge with * self during an mremap is if the vma hasn't * been faulted in yet and we were allowed to * reset the dst vma->vm_pgoff to the * destination address of the mremap to allow * the merge to happen. mremap must change the * vm_pgoff linearity between src and dst vmas * (in turn preventing a vma_merge) to be * safe. It is only safe to keep the vm_pgoff * linear if there are no pages mapped yet. */ VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma); *vmap = vma = new_vma; } *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); } else { new_vma = vm_area_dup(vma); if (!new_vma) goto out; new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; if (vma_dup_policy(vma, new_vma)) goto out_free_vma; if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; if (new_vma->vm_file) get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); *need_rmap_locks = false; } return new_vma; out_free_mempol: mpol_put(vma_policy(new_vma)); out_free_vma: vm_area_free(new_vma); out: return NULL; } /* * Return true if the calling process may expand its vm space by the passed * number of pages */ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) { if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) return false; if (is_data_mapping(flags) && mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) { /* Workaround for Valgrind */ if (rlimit(RLIMIT_DATA) == 0 && mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT) return true; pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n", current->comm, current->pid, (mm->data_vm + npages) << PAGE_SHIFT, rlimit(RLIMIT_DATA), ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data"); if (!ignore_rlimit_data) return false; } return true; } void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) { mm->total_vm += npages; if (is_exec_mapping(flags)) mm->exec_vm += npages; else if (is_stack_mapping(flags)) mm->stack_vm += npages; else if (is_data_mapping(flags)) mm->data_vm += npages; } static vm_fault_t special_mapping_fault(struct vm_fault *vmf); /* * Having a close hook prevents vma merging regardless of flags. */ static void special_mapping_close(struct vm_area_struct *vma) { } static const char *special_mapping_name(struct vm_area_struct *vma) { return ((struct vm_special_mapping *)vma->vm_private_data)->name; } static int special_mapping_mremap(struct vm_area_struct *new_vma) { struct vm_special_mapping *sm = new_vma->vm_private_data; if (WARN_ON_ONCE(current->mm != new_vma->vm_mm)) return -EFAULT; if (sm->mremap) return sm->mremap(sm, new_vma); return 0; } static const struct vm_operations_struct special_mapping_vmops = { .close = special_mapping_close, .fault = special_mapping_fault, .mremap = special_mapping_mremap, .name = special_mapping_name, /* vDSO code relies that VVAR can't be accessed remotely */ .access = NULL, }; static const struct vm_operations_struct legacy_special_mapping_vmops = { .close = special_mapping_close, .fault = special_mapping_fault, }; static vm_fault_t special_mapping_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; pgoff_t pgoff; struct page **pages; if (vma->vm_ops == &legacy_special_mapping_vmops) { pages = vma->vm_private_data; } else { struct vm_special_mapping *sm = vma->vm_private_data; if (sm->fault) return sm->fault(sm, vmf->vma, vmf); pages = sm->pages; } for (pgoff = vmf->pgoff; pgoff && *pages; ++pages) pgoff--; if (*pages) { struct page *page = *pages; get_page(page); vmf->page = page; return 0; } return VM_FAULT_SIGBUS; } static struct vm_area_struct *__install_special_mapping( struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long vm_flags, void *priv, const struct vm_operations_struct *ops) { int ret; struct vm_area_struct *vma; vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = ops; vma->vm_private_data = priv; ret = insert_vm_struct(mm, vma); if (ret) goto out; vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); perf_event_mmap(vma); return vma; out: vm_area_free(vma); return ERR_PTR(ret); } bool vma_is_special_mapping(const struct vm_area_struct *vma, const struct vm_special_mapping *sm) { return vma->vm_private_data == sm && (vma->vm_ops == &special_mapping_vmops || vma->vm_ops == &legacy_special_mapping_vmops); } /* * Called with mm->mmap_lock held for writing. * Insert a new vma covering the given region, with the given flags. * Its pages are supplied by the given array of struct page *. * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. * The region past the last page supplied will always produce SIGBUS. * The array pointer and the pages it points to are assumed to stay alive * for as long as this mapping might exist. */ struct vm_area_struct *_install_special_mapping( struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long vm_flags, const struct vm_special_mapping *spec) { return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec, &special_mapping_vmops); } int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long vm_flags, struct page **pages) { struct vm_area_struct *vma = __install_special_mapping( mm, addr, len, vm_flags, (void *)pages, &legacy_special_mapping_vmops); return PTR_ERR_OR_ZERO(vma); } static DEFINE_MUTEX(mm_all_locks_mutex); static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) { if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. */ down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock); /* * We can safely modify head.next after taking the * anon_vma->root->rwsem. If some other vma in this mm shares * the same anon_vma we won't take it again. * * No need of atomic instructions here, head.next * can't change from under us thanks to the * anon_vma->root->rwsem. */ if (__test_and_set_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); } } static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) { if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { /* * AS_MM_ALL_LOCKS can't change from under us because * we hold the mm_all_locks_mutex. * * Operations on ->flags have to be atomic because * even if AS_MM_ALL_LOCKS is stable thanks to the * mm_all_locks_mutex, there may be other cpus * changing other bitflags in parallel to us. */ if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock); } } /* * This operation locks against the VM for all pte/vma/mm related * operations that could ever happen on a certain mm. This includes * vmtruncate, try_to_unmap, and all page faults. * * The caller must take the mmap_lock in write mode before calling * mm_take_all_locks(). The caller isn't allowed to release the * mmap_lock until mm_drop_all_locks() returns. * * mmap_lock in write mode is required in order to block all operations * that could modify pagetables and free pages without need of * altering the vma layout. It's also needed in write mode to avoid new * anon_vmas to be associated with existing vmas. * * A single task can't take more than one mm_take_all_locks() in a row * or it would deadlock. * * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in * mapping->flags avoid to take the same lock twice, if more than one * vma in this mm is backed by the same anon_vma or address_space. * * We take locks in following order, accordingly to comment at beginning * of mm/rmap.c: * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for * hugetlb mapping); * - all i_mmap_rwsem locks; * - all anon_vma->rwseml * * We can take all locks within these types randomly because the VM code * doesn't nest them and we protected from parallel mm_take_all_locks() by * mm_all_locks_mutex. * * mm_take_all_locks() and mm_drop_all_locks are expensive operations * that may have to take thousand of locks. * * mm_take_all_locks() can fail if it's interrupted by signals. */ int mm_take_all_locks(struct mm_struct *mm) { struct vm_area_struct *vma; struct anon_vma_chain *avc; BUG_ON(mmap_read_trylock(mm)); mutex_lock(&mm_all_locks_mutex); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (signal_pending(current)) goto out_unlock; if (vma->vm_file && vma->vm_file->f_mapping && is_vm_hugetlb_page(vma)) vm_lock_mapping(mm, vma->vm_file->f_mapping); } for (vma = mm->mmap; vma; vma = vma->vm_next) { if (signal_pending(current)) goto out_unlock; if (vma->vm_file && vma->vm_file->f_mapping && !is_vm_hugetlb_page(vma)) vm_lock_mapping(mm, vma->vm_file->f_mapping); } for (vma = mm->mmap; vma; vma = vma->vm_next) { if (signal_pending(current)) goto out_unlock; if (vma->anon_vma) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) vm_lock_anon_vma(mm, avc->anon_vma); } return 0; out_unlock: mm_drop_all_locks(mm); return -EINTR; } static void vm_unlock_anon_vma(struct anon_vma *anon_vma) { if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change to 0 from under * us because we hold the mm_all_locks_mutex. * * We must however clear the bitflag before unlocking * the vma so the users using the anon_vma->rb_root will * never see our bitflag. * * No need of atomic instructions here, head.next * can't change from under us until we release the * anon_vma->root->rwsem. */ if (!__test_and_clear_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); anon_vma_unlock_write(anon_vma); } } static void vm_unlock_mapping(struct address_space *mapping) { if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { /* * AS_MM_ALL_LOCKS can't change to 0 from under us * because we hold the mm_all_locks_mutex. */ i_mmap_unlock_write(mapping); if (!test_and_clear_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); } } /* * The mmap_lock cannot be released by the caller until * mm_drop_all_locks() returns. */ void mm_drop_all_locks(struct mm_struct *mm) { struct vm_area_struct *vma; struct anon_vma_chain *avc; BUG_ON(mmap_read_trylock(mm)); BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->anon_vma) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) vm_unlock_anon_vma(avc->anon_vma); if (vma->vm_file && vma->vm_file->f_mapping) vm_unlock_mapping(vma->vm_file->f_mapping); } mutex_unlock(&mm_all_locks_mutex); } /* * initialise the percpu counter for VM */ void __init mmap_init(void) { int ret; ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); } /* * Initialise sysctl_user_reserve_kbytes. * * This is intended to prevent a user from starting a single memory hogging * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER * mode. * * The default value is min(3% of free memory, 128MB) * 128MB is enough to recover with sshd/login, bash, and top/kill. */ static int init_user_reserve(void) { unsigned long free_kbytes; free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; } subsys_initcall(init_user_reserve); /* * Initialise sysctl_admin_reserve_kbytes. * * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin * to log in and kill a memory hogging process. * * Systems with more than 256MB will reserve 8MB, enough to recover * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will * only reserve 3% of free pages by default. */ static int init_admin_reserve(void) { unsigned long free_kbytes; free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; } subsys_initcall(init_admin_reserve); /* * Reinititalise user and admin reserves if memory is added or removed. * * The default user reserve max is 128MB, and the default max for the * admin reserve is 8MB. These are usually, but not always, enough to * enable recovery from a memory hogging process using login/sshd, a shell, * and tools like top. It may make sense to increase or even disable the * reserve depending on the existence of swap or variations in the recovery * tools. So, the admin may have changed them. * * If memory is added and the reserves have been eliminated or increased above * the default max, then we'll trust the admin. * * If memory is removed and there isn't enough free memory, then we * need to reset the reserves. * * Otherwise keep the reserve set by the admin. */ static int reserve_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { unsigned long tmp, free_kbytes; switch (action) { case MEM_ONLINE: /* Default max is 128MB. Leave alone if modified by operator. */ tmp = sysctl_user_reserve_kbytes; if (0 < tmp && tmp < (1UL << 17)) init_user_reserve(); /* Default max is 8MB. Leave alone if modified by operator. */ tmp = sysctl_admin_reserve_kbytes; if (0 < tmp && tmp < (1UL << 13)) init_admin_reserve(); break; case MEM_OFFLINE: free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); if (sysctl_user_reserve_kbytes > free_kbytes) { init_user_reserve(); pr_info("vm.user_reserve_kbytes reset to %lu\n", sysctl_user_reserve_kbytes); } if (sysctl_admin_reserve_kbytes > free_kbytes) { init_admin_reserve(); pr_info("vm.admin_reserve_kbytes reset to %lu\n", sysctl_admin_reserve_kbytes); } break; default: break; } return NOTIFY_OK; } static struct notifier_block reserve_mem_nb = { .notifier_call = reserve_mem_notifier, }; static int __meminit init_reserve_notifier(void) { if (register_hotmemory_notifier(&reserve_mem_nb)) pr_err("Failed registering memory add/remove notifier for admin reserve\n"); return 0; } subsys_initcall(init_reserve_notifier);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H /* * Jump label support * * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * * The use of 'struct static_key' directly, is now DEPRECATED. In addition * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: * * struct static_key false = STATIC_KEY_INIT_FALSE; * struct static_key true = STATIC_KEY_INIT_TRUE; * static_key_true() * static_key_false() * * The updated API replacements are: * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", * an "if (static_branch_unlikely(&key))" statement is an unconditional branch * (which defaults to false - and the true block is placed out of line). * Similarly, we can define an initially true key via * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same * "if (static_branch_unlikely(&key))", in which case we will generate an * unconditional branch to the out-of-line true branch. Keys that are * initially true or false can be using in both static_branch_unlikely() * and static_branch_likely() statements. * * At runtime we can change the branch target by setting the key * to true via a call to static_branch_enable(), or false using * static_branch_disable(). If the direction of the branch is switched by * these calls then we run-time modify the branch target via a * no-op -> jump or jump -> no-op conversion. For example, for an * initially false key that is used in an "if (static_branch_unlikely(&key))" * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, static keys fall back to a * simple conditional branch. * * Additional babbling in: Documentation/staging/static-keys.rst */ #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/compiler.h> extern bool static_key_initialized; #define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized, \ "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) #ifdef CONFIG_JUMP_LABEL struct static_key { atomic_t enabled; /* * Note: * To make anonymous unions work with old compilers, the static * initialization of them requires brackets. This creates a dependency * on the order of the struct with the initializers. If any fields * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need * to be modified. * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod * 0 if points to struct jump_entry */ union { unsigned long type; struct jump_entry *entries; struct static_key_mod *next; }; }; #else struct static_key { atomic_t enabled; }; #endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL #include <asm/jump_label.h> #ifndef __ASSEMBLY__ #ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE struct jump_entry { s32 code; s32 target; long key; // key may be far away from the core kernel under KASLR }; static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return (unsigned long)&entry->code + entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return (unsigned long)&entry->target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { long offset = entry->key & ~3L; return (struct static_key *)((unsigned long)&entry->key + offset); } #else static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { return (struct static_key *)((unsigned long)entry->key & ~3UL); } #endif static inline bool jump_entry_is_branch(const struct jump_entry *entry) { return (unsigned long)entry->key & 1UL; } static inline bool jump_entry_is_init(const struct jump_entry *entry) { return (unsigned long)entry->key & 2UL; } static inline void jump_entry_set_init(struct jump_entry *entry) { entry->key |= 2; } #endif #endif #ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_NOP = 0, JUMP_LABEL_JMP, }; struct module; #ifdef CONFIG_JUMP_LABEL #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_LINKED 2UL #define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); /* * We should be using ATOMIC_INIT() for initializing .enabled, but * the inclusion of atomic.h is problematic for inclusion of jump_label.h * in 'low-level' headers. Thus, we are initializing .enabled with a * raw value, but have added a BUILD_BUG_ON() to catch any issues in * jump_label_init() see: kernel/jump_label.c. */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ #include <linux/atomic.h> #include <linux/bug.h> static inline int static_key_count(struct static_key *key) { return atomic_read(&key->enabled); } static __always_inline void jump_label_init(void) { static_key_initialized = true; } static __always_inline bool static_key_false(struct static_key *key) { if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { if (likely(static_key_count(key) > 0)) return true; return false; } static inline void static_key_slow_inc(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_dec(&key->enabled); } #define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) #define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) static inline int jump_label_text_reserved(void *start, void *end) { return 0; } static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} static inline int jump_label_apply_nops(struct module *mod) { return 0; } static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); return; } atomic_set(&key->enabled, 1); } static inline void static_key_disable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); return; } atomic_set(&key->enabled, 0); } #define static_key_enable_cpuslocked(k) static_key_enable((k)) #define static_key_disable_cpuslocked(k) static_key_disable((k)) #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* CONFIG_JUMP_LABEL */ #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled /* -------------------------------------------------------------------------- */ /* * Two type wrappers around static_key, such that we can use compile time * type differentiation to emit the right code. * * All the below code is macros in order to play type games. */ struct static_key_true { struct static_key key; }; struct static_key_false { struct static_key key; }; #define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT #define DEFINE_STATIC_KEY_TRUE_RO(name) \ struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT #define DECLARE_STATIC_KEY_TRUE(name) \ extern struct static_key_true name #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT #define DEFINE_STATIC_KEY_FALSE_RO(name) \ struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT #define DECLARE_STATIC_KEY_FALSE(name) \ extern struct static_key_false name #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ struct static_key_true name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ } #define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ struct static_key_false name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ ({ \ if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ ____wrong_branch_error(); \ static_key_count((struct static_key *)x) > 0; \ }) #ifdef CONFIG_JUMP_LABEL /* * Combine the right initial value (type) with the right branch order * to generate the desired result. * * * type\branch| likely (1) | unlikely (0) * -----------+-----------------------+------------------ * | | * true (1) | ... | ... * | NOP | JMP L * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * | | * false (0) | ... | ... * | JMP L | NOP * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * * The initial value is encoded in the LSB of static_key::entries, * type: 0 = false, 1 = true. * * The branch type is encoded in the LSB of jump_entry::key, * branch: 0 = unlikely, 1 = likely. * * This gives the following logic table: * * enabled type branch instuction * -----------------------------+----------- * 0 0 0 | NOP * 0 0 1 | JMP * 0 1 0 | NOP * 0 1 1 | JMP * * 1 0 0 | JMP * 1 0 1 | NOP * 1 1 0 | JMP * 1 1 1 | NOP * * Which gives the following functions: * * dynamic: instruction = enabled ^ branch * static: instruction = type ^ branch * * See jump_label_type() / jump_label_init_type(). */ #define static_branch_likely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = !arch_static_branch(&(x)->key, true); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = !arch_static_branch_jump(&(x)->key, true); \ else \ branch = ____wrong_branch_error(); \ likely(branch); \ }) #define static_branch_unlikely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = arch_static_branch_jump(&(x)->key, false); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = arch_static_branch(&(x)->key, false); \ else \ branch = ____wrong_branch_error(); \ unlikely(branch); \ }) #else /* !CONFIG_JUMP_LABEL */ #define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) #endif /* CONFIG_JUMP_LABEL */ /* * Advanced usage; refcount, branch is enabled when: count != 0 */ #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) #define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) #define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. */ #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) #endif /* __ASSEMBLY__ */ #endif /* _LINUX_JUMP_LABEL_H */
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 /* * mm/rmap.c - physical to virtual reverse mappings * * Copyright 2001, Rik van Riel <riel@conectiva.com.br> * Released under the General Public License (GPL). * * Simple, low overhead reverse mapping scheme. * Please try to keep this thing as modular as possible. * * Provides methods for unmapping each kind of mapped page: * the anon methods track anonymous pages, and * the file methods track pages belonging to an inode. * * Original design by Rik van Riel <riel@conectiva.com.br> 2001 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004 * Contributions by Hugh Dickins 2003, 2004 */ /* * Lock ordering in mm: * * inode->i_mutex (while writing or truncating, not reading or faulting) * mm->mmap_lock * page->flags PG_locked (lock_page) * (see huegtlbfs below) * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share) * mapping->i_mmap_rwsem * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) * anon_vma->rwsem * mm->page_table_lock or pte_lock * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page) * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) * mapping->private_lock (in __set_page_dirty_buffers) * mem_cgroup_{begin,end}_page_stat (memcg->move_lock) * i_pages lock (widely used) * inode->i_lock (in set_page_dirty's __mark_inode_dirty) * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) * i_pages lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, * within bdi.wb->list_lock in __sync_single_inode) * * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) * ->tasklist_lock * pte map lock * * * hugetlbfs PageHuge() pages take locks in this order: * mapping->i_mmap_rwsem * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) * page->flags PG_locked (lock_page) */ #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/rcupdate.h> #include <linux/export.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> #include <linux/migrate.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> #include <linux/backing-dev.h> #include <linux/page_idle.h> #include <linux/memremap.h> #include <linux/userfaultfd_k.h> #include <asm/tlbflush.h> #include <trace/events/tlb.h> #include "internal.h" static struct kmem_cache *anon_vma_cachep; static struct kmem_cache *anon_vma_chain_cachep; static inline struct anon_vma *anon_vma_alloc(void) { struct anon_vma *anon_vma; anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { atomic_set(&anon_vma->refcount, 1); anon_vma->degree = 1; /* Reference for first vma */ anon_vma->parent = anon_vma; /* * Initialise the anon_vma root to point to itself. If called * from fork, the root will be reset to the parents anon_vma. */ anon_vma->root = anon_vma; } return anon_vma; } static inline void anon_vma_free(struct anon_vma *anon_vma) { VM_BUG_ON(atomic_read(&anon_vma->refcount)); /* * Synchronize against page_lock_anon_vma_read() such that * we can safely hold the lock without the anon_vma getting * freed. * * Relies on the full mb implied by the atomic_dec_and_test() from * put_anon_vma() against the acquire barrier implied by * down_read_trylock() from page_lock_anon_vma_read(). This orders: * * page_lock_anon_vma_read() VS put_anon_vma() * down_read_trylock() atomic_dec_and_test() * LOCK MB * atomic_read() rwsem_is_locked() * * LOCK should suffice since the actual taking of the lock must * happen _before_ what follows. */ might_sleep(); if (rwsem_is_locked(&anon_vma->root->rwsem)) { anon_vma_lock_write(anon_vma); anon_vma_unlock_write(anon_vma); } kmem_cache_free(anon_vma_cachep, anon_vma); } static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp) { return kmem_cache_alloc(anon_vma_chain_cachep, gfp); } static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) { kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain); } static void anon_vma_chain_link(struct vm_area_struct *vma, struct anon_vma_chain *avc, struct anon_vma *anon_vma) { avc->vma = vma; avc->anon_vma = anon_vma; list_add(&avc->same_vma, &vma->anon_vma_chain); anon_vma_interval_tree_insert(avc, &anon_vma->rb_root); } /** * __anon_vma_prepare - attach an anon_vma to a memory region * @vma: the memory region in question * * This makes sure the memory mapping described by 'vma' has * an 'anon_vma' attached to it, so that we can associate the * anonymous pages mapped into it with that anon_vma. * * The common case will be that we already have one, which * is handled inline by anon_vma_prepare(). But if * not we either need to find an adjacent mapping that we * can re-use the anon_vma from (very common when the only * reason for splitting a vma has been mprotect()), or we * allocate a new one. * * Anon-vma allocations are very subtle, because we may have * optimistically looked up an anon_vma in page_lock_anon_vma_read() * and that may actually touch the spinlock even in the newly * allocated vma (it depends on RCU to make sure that the * anon_vma isn't actually destroyed). * * As a result, we need to do proper anon_vma locking even * for the new allocation. At the same time, we do not want * to do any locking for the common case of already having * an anon_vma. * * This must be called with the mmap_lock held for reading. */ int __anon_vma_prepare(struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct anon_vma *anon_vma, *allocated; struct anon_vma_chain *avc; might_sleep(); avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_enomem; anon_vma = find_mergeable_anon_vma(vma); allocated = NULL; if (!anon_vma) { anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) goto out_enomem_free_avc; allocated = anon_vma; } anon_vma_lock_write(anon_vma); /* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; anon_vma_chain_link(vma, avc, anon_vma); /* vma reference or self-parent link for new root */ anon_vma->degree++; allocated = NULL; avc = NULL; } spin_unlock(&mm->page_table_lock); anon_vma_unlock_write(anon_vma); if (unlikely(allocated)) put_anon_vma(allocated); if (unlikely(avc)) anon_vma_chain_free(avc); return 0; out_enomem_free_avc: anon_vma_chain_free(avc); out_enomem: return -ENOMEM; } /* * This is a useful helper function for locking the anon_vma root as * we traverse the vma->anon_vma_chain, looping over anon_vma's that * have the same vma. * * Such anon_vma's should have the same root, so you'd expect to see * just a single mutex_lock for the whole traversal. */ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma) { struct anon_vma *new_root = anon_vma->root; if (new_root != root) { if (WARN_ON_ONCE(root)) up_write(&root->rwsem); root = new_root; down_write(&root->rwsem); } return root; } static inline void unlock_anon_vma_root(struct anon_vma *root) { if (root) up_write(&root->rwsem); } /* * Attach the anon_vmas from src to dst. * Returns 0 on success, -ENOMEM on failure. * * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and * anon_vma_fork(). The first three want an exact copy of src, while the last * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call, * we can identify this case by checking (!dst->anon_vma && src->anon_vma). * * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find * and reuse existing anon_vma which has no vmas and only one child anon_vma. * This prevents degradation of anon_vma hierarchy to endless linear chain in * case of constantly forking task. On the other hand, an anon_vma with more * than one child isn't reused even if there was no alive vma, thus rmap * walker has a good chance of avoiding scanning the whole hierarchy when it * searches where page is mapped. */ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { struct anon_vma_chain *avc, *pavc; struct anon_vma *root = NULL; list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { struct anon_vma *anon_vma; avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN); if (unlikely(!avc)) { unlock_anon_vma_root(root); root = NULL; avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto enomem_failure; } anon_vma = pavc->anon_vma; root = lock_anon_vma_root(root, anon_vma); anon_vma_chain_link(dst, avc, anon_vma); /* * Reuse existing anon_vma if its degree lower than two, * that means it has no vma and only one anon_vma child. * * Do not chose parent anon_vma, otherwise first child * will always reuse it. Root anon_vma is never reused: * it has self-parent reference and at least one child. */ if (!dst->anon_vma && src->anon_vma && anon_vma != src->anon_vma && anon_vma->degree < 2) dst->anon_vma = anon_vma; } if (dst->anon_vma) dst->anon_vma->degree++; unlock_anon_vma_root(root); return 0; enomem_failure: /* * dst->anon_vma is dropped here otherwise its degree can be incorrectly * decremented in unlink_anon_vmas(). * We can safely do this because callers of anon_vma_clone() don't care * about dst->anon_vma if anon_vma_clone() failed. */ dst->anon_vma = NULL; unlink_anon_vmas(dst); return -ENOMEM; } /* * Attach vma to its own anon_vma, as well as to the anon_vmas that * the corresponding VMA in the parent process is attached to. * Returns 0 on success, non-zero on failure. */ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) { struct anon_vma_chain *avc; struct anon_vma *anon_vma; int error; /* Don't bother if the parent process has no anon_vma here. */ if (!pvma->anon_vma) return 0; /* Drop inherited anon_vma, we'll reuse existing or allocate new. */ vma->anon_vma = NULL; /* * First, attach the new VMA to the parent VMA's anon_vmas, * so rmap can find non-COWed pages in child processes. */ error = anon_vma_clone(vma, pvma); if (error) return error; /* An existing anon_vma has been reused, all done then. */ if (vma->anon_vma) return 0; /* Then add our own anon_vma. */ anon_vma = anon_vma_alloc(); if (!anon_vma) goto out_error; avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_error_free_anon_vma; /* * The root anon_vma's spinlock is the lock actually used when we * lock any of the anon_vmas in this anon_vma tree. */ anon_vma->root = pvma->anon_vma->root; anon_vma->parent = pvma->anon_vma; /* * With refcounts, an anon_vma can stay around longer than the * process it belongs to. The root anon_vma needs to be pinned until * this anon_vma is freed, because the lock lives in the root. */ get_anon_vma(anon_vma->root); /* Mark this anon_vma as the one where our new (COWed) pages go. */ vma->anon_vma = anon_vma; anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); anon_vma->parent->degree++; anon_vma_unlock_write(anon_vma); return 0; out_error_free_anon_vma: put_anon_vma(anon_vma); out_error: unlink_anon_vmas(vma); return -ENOMEM; } void unlink_anon_vmas(struct vm_area_struct *vma) { struct anon_vma_chain *avc, *next; struct anon_vma *root = NULL; /* * Unlink each anon_vma chained to the VMA. This list is ordered * from newest to oldest, ensuring the root anon_vma gets freed last. */ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; root = lock_anon_vma_root(root, anon_vma); anon_vma_interval_tree_remove(avc, &anon_vma->rb_root); /* * Leave empty anon_vmas on the list - we'll need * to free them outside the lock. */ if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { anon_vma->parent->degree--; continue; } list_del(&avc->same_vma); anon_vma_chain_free(avc); } if (vma->anon_vma) vma->anon_vma->degree--; unlock_anon_vma_root(root); /* * Iterate the list once more, it now only contains empty and unlinked * anon_vmas, destroy them. Could not do before due to __put_anon_vma() * needing to write-acquire the anon_vma->root->rwsem. */ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; VM_WARN_ON(anon_vma->degree); put_anon_vma(anon_vma); list_del(&avc->same_vma); anon_vma_chain_free(avc); } } static void anon_vma_ctor(void *data) { struct anon_vma *anon_vma = data; init_rwsem(&anon_vma->rwsem); atomic_set(&anon_vma->refcount, 0); anon_vma->rb_root = RB_ROOT_CACHED; } void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, anon_vma_ctor); anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC|SLAB_ACCOUNT); } /* * Getting a lock on a stable anon_vma from a page off the LRU is tricky! * * Since there is no serialization what so ever against page_remove_rmap() * the best this function can do is return a locked anon_vma that might * have been relevant to this page. * * The page might have been remapped to a different anon_vma or the anon_vma * returned may already be freed (and even reused). * * In case it was remapped to a different anon_vma, the new anon_vma will be a * child of the old anon_vma, and the anon_vma lifetime rules will therefore * ensure that any anon_vma obtained from the page will still be valid for as * long as we observe page_mapped() [ hence all those page_mapped() tests ]. * * All users of this function must be very careful when walking the anon_vma * chain and verify that the page in question is indeed mapped in it * [ something equivalent to page_mapped_in_vma() ]. * * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from * page_remove_rmap() that the anon_vma pointer from page->mapping is valid * if there is a mapcount, we can dereference the anon_vma after observing * those. */ struct anon_vma *page_get_anon_vma(struct page *page) { struct anon_vma *anon_vma = NULL; unsigned long anon_mapping; rcu_read_lock(); anon_mapping = (unsigned long)READ_ONCE(page->mapping); if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) goto out; if (!page_mapped(page)) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); if (!atomic_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } /* * If this page is still mapped, then its anon_vma cannot have been * freed. But if it has been unmapped, we have no security against the * anon_vma structure being freed and reused (for another anon_vma: * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero() * above cannot corrupt). */ if (!page_mapped(page)) { rcu_read_unlock(); put_anon_vma(anon_vma); return NULL; } out: rcu_read_unlock(); return anon_vma; } /* * Similar to page_get_anon_vma() except it locks the anon_vma. * * Its a little more complex as it tries to keep the fast path to a single * atomic op -- the trylock. If we fail the trylock, we fall back to getting a * reference like with page_get_anon_vma() and then block on the mutex. */ struct anon_vma *page_lock_anon_vma_read(struct page *page) { struct anon_vma *anon_vma = NULL; struct anon_vma *root_anon_vma; unsigned long anon_mapping; rcu_read_lock(); anon_mapping = (unsigned long)READ_ONCE(page->mapping); if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) goto out; if (!page_mapped(page)) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); root_anon_vma = READ_ONCE(anon_vma->root); if (down_read_trylock(&root_anon_vma->rwsem)) { /* * If the page is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will * not go away, see anon_vma_free(). */ if (!page_mapped(page)) { up_read(&root_anon_vma->rwsem); anon_vma = NULL; } goto out; } /* trylock failed, we got to sleep */ if (!atomic_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } if (!page_mapped(page)) { rcu_read_unlock(); put_anon_vma(anon_vma); return NULL; } /* we pinned the anon_vma, its safe to sleep */ rcu_read_unlock(); anon_vma_lock_read(anon_vma); if (atomic_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock * and bail -- can't simply use put_anon_vma() because * we'll deadlock on the anon_vma_lock_write() recursion. */ anon_vma_unlock_read(anon_vma); __put_anon_vma(anon_vma); anon_vma = NULL; } return anon_vma; out: rcu_read_unlock(); return anon_vma; } void page_unlock_anon_vma_read(struct anon_vma *anon_vma) { anon_vma_unlock_read(anon_vma); } #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* * Flush TLB entries for recently unmapped pages from remote CPUs. It is * important if a PTE was dirty when it was unmapped that it's flushed * before any IO is initiated on the page to prevent lost writes. Similarly, * it must be flushed before freeing to prevent data leakage. */ void try_to_unmap_flush(void) { struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc; if (!tlb_ubc->flush_required) return; arch_tlbbatch_flush(&tlb_ubc->arch); tlb_ubc->flush_required = false; tlb_ubc->writable = false; } /* Flush iff there are potentially writable TLB entries that can race with IO */ void try_to_unmap_flush_dirty(void) { struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc; if (tlb_ubc->writable) try_to_unmap_flush(); } static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc; arch_tlbbatch_add_mm(&tlb_ubc->arch, mm); tlb_ubc->flush_required = true; /* * Ensure compiler does not re-order the setting of tlb_flush_batched * before the PTE is cleared. */ barrier(); mm->tlb_flush_batched = true; /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * before the page is queued for IO. */ if (writable) tlb_ubc->writable = true; } /* * Returns true if the TLB flush should be deferred to the end of a batch of * unmap operations to reduce IPIs. */ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) { bool should_defer = false; if (!(flags & TTU_BATCH_FLUSH)) return false; /* If remote CPUs need to be flushed then defer batch the flush */ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids) should_defer = true; put_cpu(); return should_defer; } /* * Reclaim unmaps pages under the PTL but do not flush the TLB prior to * releasing the PTL if TLB flushes are batched. It's possible for a parallel * operation such as mprotect or munmap to race between reclaim unmapping * the page and flushing the page. If this race occurs, it potentially allows * access to data via a stale TLB entry. Tracking all mm's that have TLB * batching in flight would be expensive during reclaim so instead track * whether TLB batching occurred in the past and if so then do a flush here * if required. This will cost one additional flush per reclaim cycle paid * by the first operation at risk such as mprotect and mumap. * * This must be called under the PTL so that an access to tlb_flush_batched * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise * via the PTL. */ void flush_tlb_batched_pending(struct mm_struct *mm) { if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); /* * Do not allow the compiler to re-order the clearing of * tlb_flush_batched before the tlb is flushed. */ barrier(); mm->tlb_flush_batched = false; } } #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { } static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) { return false; } #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ /* * At what user virtual address is page expected in vma? * Caller should check the page is actually part of the vma. */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { if (PageAnon(page)) { struct anon_vma *page__anon_vma = page_anon_vma(page); /* * Note: swapoff's unuse_vma() is more efficient with this * check, and needs it to match anon_vma when KSM is active. */ if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) return -EFAULT; } else if (!vma->vm_file) { return -EFAULT; } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { return -EFAULT; } return vma_address(page, vma); } pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd = NULL; pmd_t pmde; pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) goto out; pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); /* * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at() * without holding anon_vma lock for write. So when looking for a * genuine pmde (in which to find pte), test present and !THP together. */ pmde = *pmd; barrier(); if (!pmd_present(pmde) || pmd_trans_huge(pmde)) pmd = NULL; out: return pmd; } struct page_referenced_arg { int mapcount; int referenced; unsigned long vm_flags; struct mem_cgroup *memcg; }; /* * arg: page_referenced_arg will be passed */ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct page_referenced_arg *pra = arg; struct page_vma_mapped_walk pvmw = { .page = page, .vma = vma, .address = address, }; int referenced = 0; while (page_vma_mapped_walk(&pvmw)) { address = pvmw.address; if (vma->vm_flags & VM_LOCKED) { page_vma_mapped_walk_done(&pvmw); pra->vm_flags |= VM_LOCKED; return false; /* To break the loop */ } if (pvmw.pte) { if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { /* * Don't treat a reference through * a sequentially read mapping as such. * If the page has been used in another mapping, * we will catch it; if this other mapping is * already gone, the unmap path will have set * PG_referenced or activated the page. */ if (likely(!(vma->vm_flags & VM_SEQ_READ))) referenced++; } } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { if (pmdp_clear_flush_young_notify(vma, address, pvmw.pmd)) referenced++; } else { /* unexpected pmd-mapped page? */ WARN_ON_ONCE(1); } pra->mapcount--; } if (referenced) clear_page_idle(page); if (test_and_clear_page_young(page)) referenced++; if (referenced) { pra->referenced++; pra->vm_flags |= vma->vm_flags; } if (!pra->mapcount) return false; /* To break the loop */ return true; } static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) { struct page_referenced_arg *pra = arg; struct mem_cgroup *memcg = pra->memcg; if (!mm_match_cgroup(vma->vm_mm, memcg)) return true; return false; } /** * page_referenced - test if the page was referenced * @page: the page to test * @is_locked: caller holds lock on the page * @memcg: target memory cgroup * @vm_flags: collect encountered vma->vm_flags who actually referenced the page * * Quick test_and_clear_referenced for all mappings to a page, * returns the number of ptes which referenced the page. */ int page_referenced(struct page *page, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags) { int we_locked = 0; struct page_referenced_arg pra = { .mapcount = total_mapcount(page), .memcg = memcg, }; struct rmap_walk_control rwc = { .rmap_one = page_referenced_one, .arg = (void *)&pra, .anon_lock = page_lock_anon_vma_read, }; *vm_flags = 0; if (!pra.mapcount) return 0; if (!page_rmapping(page)) return 0; if (!is_locked && (!PageAnon(page) || PageKsm(page))) { we_locked = trylock_page(page); if (!we_locked) return 1; } /* * If we are reclaiming on behalf of a cgroup, skip * counting on behalf of references from different * cgroups */ if (memcg) { rwc.invalid_vma = invalid_page_referenced_vma; } rmap_walk(page, &rwc); *vm_flags = pra.vm_flags; if (we_locked) unlock_page(page); return pra.referenced; } static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct page_vma_mapped_walk pvmw = { .page = page, .vma = vma, .address = address, .flags = PVMW_SYNC, }; struct mmu_notifier_range range; int *cleaned = arg; /* * We have to assume the worse case ie pmd for invalidation. Note that * the page can not be free from this function. */ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, vma, vma->vm_mm, address, vma_address_end(page, vma)); mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { int ret = 0; address = pvmw.address; if (pvmw.pte) { pte_t entry; pte_t *pte = pvmw.pte; if (!pte_dirty(*pte) && !pte_write(*pte)) continue; flush_cache_page(vma, address, pte_pfn(*pte)); entry = ptep_clear_flush(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(vma->vm_mm, address, pte, entry); ret = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE pmd_t *pmd = pvmw.pmd; pmd_t entry; if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) continue; flush_cache_page(vma, address, page_to_pfn(page)); entry = pmdp_invalidate(vma, address, pmd); entry = pmd_wrprotect(entry); entry = pmd_mkclean(entry); set_pmd_at(vma->vm_mm, address, pmd, entry); ret = 1; #else /* unexpected pmd-mapped page? */ WARN_ON_ONCE(1); #endif } /* * No need to call mmu_notifier_invalidate_range() as we are * downgrading page table protection not changing it to point * to a new page. * * See Documentation/vm/mmu_notifier.rst */ if (ret) (*cleaned)++; } mmu_notifier_invalidate_range_end(&range); return true; } static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) { if (vma->vm_flags & VM_SHARED) return false; return true; } int page_mkclean(struct page *page) { int cleaned = 0; struct address_space *mapping; struct rmap_walk_control rwc = { .arg = (void *)&cleaned, .rmap_one = page_mkclean_one, .invalid_vma = invalid_mkclean_vma, }; BUG_ON(!PageLocked(page)); if (!page_mapped(page)) return 0; mapping = page_mapping(page); if (!mapping) return 0; rmap_walk(page, &rwc); return cleaned; } EXPORT_SYMBOL_GPL(page_mkclean); /** * page_move_anon_rmap - move a page to our anon_vma * @page: the page to move to our anon_vma * @vma: the vma the page belongs to * * When a page belongs exclusively to one process after a COW event, * that page can be moved into the anon_vma that belongs to just that * process, so the rmap code will not search the parent or sibling * processes. */ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_VMA(!anon_vma, vma); anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; /* * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written * simultaneously, so a concurrent reader (eg page_referenced()'s * PageAnon()) will not see one without the other. */ WRITE_ONCE(page->mapping, (struct address_space *) anon_vma); } /** * __page_set_anon_rmap - set up new anonymous rmap * @page: Page or Hugepage to add to rmap * @vma: VM area to add page to. * @address: User virtual address of the mapping * @exclusive: the page is exclusively owned by the current process */ static void __page_set_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int exclusive) { struct anon_vma *anon_vma = vma->anon_vma; BUG_ON(!anon_vma); if (PageAnon(page)) return; /* * If the page isn't exclusively mapped into this vma, * we must use the _oldest_ possible anon_vma for the * page mapping! */ if (!exclusive) anon_vma = anon_vma->root; anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address); } /** * __page_check_anon_rmap - sanity check anonymous rmap addition * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped */ static void __page_check_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { /* * The page's anon-rmap details (mapping and index) are guaranteed to * be set up correctly at this point. * * We have exclusion against page_add_anon_rmap because the caller * always holds the page locked, except if called from page_dup_rmap, * in which case the page is already known to be setup. * * We have exclusion against page_add_new_anon_rmap because those pages * are initially only visible via the pagetables, and the pte is locked * over the call to page_add_new_anon_rmap. */ VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page); VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address), page); } /** * page_add_anon_rmap - add pte mapping to an anonymous page * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * @compound: charge the page as compound or small page * * The caller needs to hold the pte lock, and the page must be locked in * the anon_vma case: to serialize mapping,index checking after setting, * and to ensure that PageAnon is not being upgraded racily to PageKsm * (but PageKsm is never downgraded to PageAnon). */ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, bool compound) { do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0); } /* * Special version of the above for do_swap_page, which often runs * into pages that are exclusively owned by the current process. * Everybody else should continue to use page_add_anon_rmap above. */ void do_page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int flags) { bool compound = flags & RMAP_COMPOUND; bool first; if (unlikely(PageKsm(page))) lock_page_memcg(page); else VM_BUG_ON_PAGE(!PageLocked(page), page); if (compound) { atomic_t *mapcount; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageTransHuge(page), page); mapcount = compound_mapcount_ptr(page); first = atomic_inc_and_test(mapcount); } else { first = atomic_inc_and_test(&page->_mapcount); } if (first) { int nr = compound ? thp_nr_pages(page) : 1; /* * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption * disabled. */ if (compound) __inc_lruvec_page_state(page, NR_ANON_THPS); __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); } if (unlikely(PageKsm(page))) { unlock_page_memcg(page); return; } /* address might be in next vma when migration races vma_adjust */ if (first) __page_set_anon_rmap(page, vma, address, flags & RMAP_EXCLUSIVE); else __page_check_anon_rmap(page, vma, address); } /** * page_add_new_anon_rmap - add pte mapping to a new anonymous page * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * @compound: charge the page as compound or small page * * Same as page_add_anon_rmap but must only be called on *new* pages. * This means the inc-and-test can be bypassed. * Page does not have to be locked. */ void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, bool compound) { int nr = compound ? thp_nr_pages(page) : 1; VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); __SetPageSwapBacked(page); if (compound) { VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* increment count (starts at -1) */ atomic_set(compound_mapcount_ptr(page), 0); if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0); __inc_lruvec_page_state(page, NR_ANON_THPS); } else { /* Anon THP always mapped first with PMD */ VM_BUG_ON_PAGE(PageTransCompound(page), page); /* increment count (starts at -1) */ atomic_set(&page->_mapcount, 0); } __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); __page_set_anon_rmap(page, vma, address, 1); } /** * page_add_file_rmap - add pte mapping to a file page * @page: the page to add the mapping to * @compound: charge the page as compound or small page * * The caller needs to hold the pte lock. */ void page_add_file_rmap(struct page *page, bool compound) { int i, nr = 1; VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); lock_page_memcg(page); if (compound && PageTransHuge(page)) { for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { if (atomic_inc_and_test(&page[i]._mapcount)) nr++; } if (!atomic_inc_and_test(compound_mapcount_ptr(page))) goto out; if (PageSwapBacked(page)) __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); else __inc_node_page_state(page, NR_FILE_PMDMAPPED); } else { if (PageTransCompound(page) && page_mapping(page)) { VM_WARN_ON_ONCE(!PageLocked(page)); SetPageDoubleMap(compound_head(page)); if (PageMlocked(page)) clear_page_mlock(compound_head(page)); } if (!atomic_inc_and_test(&page->_mapcount)) goto out; } __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr); out: unlock_page_memcg(page); } static void page_remove_file_rmap(struct page *page, bool compound) { int i, nr = 1; VM_BUG_ON_PAGE(compound && !PageHead(page), page); /* Hugepages are not counted in NR_FILE_MAPPED for now. */ if (unlikely(PageHuge(page))) { /* hugetlb pages are always mapped with pmds */ atomic_dec(compound_mapcount_ptr(page)); return; } /* page still mapped by someone else? */ if (compound && PageTransHuge(page)) { for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { if (atomic_add_negative(-1, &page[i]._mapcount)) nr++; } if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) return; if (PageSwapBacked(page)) __dec_node_page_state(page, NR_SHMEM_PMDMAPPED); else __dec_node_page_state(page, NR_FILE_PMDMAPPED); } else { if (!atomic_add_negative(-1, &page->_mapcount)) return; } /* * We use the irq-unsafe __{inc|mod}_lruvec_page_state because * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption disabled. */ __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr); if (unlikely(PageMlocked(page))) clear_page_mlock(page); } static void page_remove_anon_compound_rmap(struct page *page) { int i, nr; if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) return; /* Hugepages are not counted in NR_ANON_PAGES for now. */ if (unlikely(PageHuge(page))) return; if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return; __dec_lruvec_page_state(page, NR_ANON_THPS); if (TestClearPageDoubleMap(page)) { /* * Subpages can be mapped with PTEs too. Check how many of * them are still mapped. */ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { if (atomic_add_negative(-1, &page[i]._mapcount)) nr++; } /* * Queue the page for deferred split if at least one small * page of the compound page is unmapped, but at least one * small page is still mapped. */ if (nr && nr < thp_nr_pages(page)) deferred_split_huge_page(page); } else { nr = thp_nr_pages(page); } if (unlikely(PageMlocked(page))) clear_page_mlock(page); if (nr) __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr); } /** * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from * @compound: uncharge the page as compound or small page * * The caller needs to hold the pte lock. */ void page_remove_rmap(struct page *page, bool compound) { lock_page_memcg(page); if (!PageAnon(page)) { page_remove_file_rmap(page, compound); goto out; } if (compound) { page_remove_anon_compound_rmap(page); goto out; } /* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) goto out; /* * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption disabled. */ __dec_lruvec_page_state(page, NR_ANON_MAPPED); if (unlikely(PageMlocked(page))) clear_page_mlock(page); if (PageTransCompound(page)) deferred_split_huge_page(compound_head(page)); /* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap * which increments mapcount after us but sets mapping * before us: so leave the reset to free_unref_page, * and remember that it's only reliable while mapped. * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ out: unlock_page_memcg(page); } /* * @arg: enum ttu_flags will be passed to this argument */ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; struct page_vma_mapped_walk pvmw = { .page = page, .vma = vma, .address = address, }; pte_t pteval; struct page *subpage; bool ret = true; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; /* * When racing against e.g. zap_pte_range() on another cpu, * in between its ptep_get_and_clear_full() and page_remove_rmap(), * try_to_unmap() may return false when it is about to become true, * if page table locking is skipped: use TTU_SYNC to wait for that. */ if (flags & TTU_SYNC) pvmw.flags = PVMW_SYNC; /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && is_zone_device_page(page) && !is_device_private_page(page)) return true; if (flags & TTU_SPLIT_HUGE_PMD) { split_huge_pmd_address(vma, address, flags & TTU_SPLIT_FREEZE, page); } /* * For THP, we have to assume the worse case ie pmd for invalidation. * For hugetlb, it could be much worse if we need to do pud * invalidation in the case of pmd sharing. * * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ range.end = PageKsm(page) ? address + PAGE_SIZE : vma_address_end(page, vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address, range.end); if (PageHuge(page)) { /* * If sharing is possible, start and end will be adjusted * accordingly. */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); } mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION /* PMD-mapped THP migration entry */ if (!pvmw.pte && (flags & TTU_MIGRATION)) { VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); set_pmd_migration_entry(&pvmw, page); continue; } #endif /* * If the page is mlock()d, we cannot swap it out. * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. */ if (!(flags & TTU_IGNORE_MLOCK)) { if (vma->vm_flags & VM_LOCKED) { /* PTE-mapped THP are never mlocked */ if (!PageTransCompound(page)) { /* * Holding pte lock, we do *not* need * mmap_lock here */ mlock_vma_page(page); } ret = false; page_vma_mapped_walk_done(&pvmw); break; } if (flags & TTU_MUNLOCK) continue; } /* Unexpected PMD-mapped THP? */ VM_BUG_ON_PAGE(!pvmw.pte, page); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; if (PageHuge(page) && !PageAnon(page)) { /* * To call huge_pmd_unshare, i_mmap_rwsem must be * held in write mode. Caller needs to explicitly * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { /* * huge_pmd_unshare unmapped an entire PMD * page. There is no way of knowing exactly * which PMDs may be cached for this mm, so * we must flush them all. start/end were * already adjusted above to cover this range. */ flush_cache_range(vma, range.start, range.end); flush_tlb_range(vma, range.start, range.end); mmu_notifier_invalidate_range(mm, range.start, range.end); /* * The ref count of the PMD page was dropped * which is part of the way map counting * is done for shared PMDs. Return 'true' * here. When there is no other sharing, * huge_pmd_unshare returns false and we will * unmap the actual page and drop map count * to zero. */ page_vma_mapped_walk_done(&pvmw); break; } } if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && is_zone_device_page(page)) { swp_entry_t entry; pte_t swp_pte; pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte); /* * Store the pfn of the page in a special migration * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ entry = make_migration_entry(page, 0); swp_pte = swp_entry_to_pte(entry); /* * pteval maps a zone device page and is therefore * a swap pte. */ if (pte_swp_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); if (pte_swp_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); /* * No need to invalidate here it will synchronize on * against the special swap migration pte. * * The assignment to subpage above was computed from a * swap PTE which results in an invalid pointer. * Since only PAGE_SIZE pages can currently be * migrated, just set it to page. This will need to be * changed when hugepage migrations to device private * memory are supported. */ subpage = page; goto discard; } /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags)) { /* * We clear the PTE but do not flush so potentially * a remote CPU could still be writing to the page. * If the entry was previously clean then the * architecture must guarantee that a clear->dirty * transition on a cached TLB entry is written through * and traps if the PTE is unmapped. */ pteval = ptep_get_and_clear(mm, address, pvmw.pte); set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); } else { pteval = ptep_clear_flush(vma, address, pvmw.pte); } /* Move the dirty bit to the page. Now the pte is gone. */ if (pte_dirty(pteval)) set_page_dirty(page); /* Update high watermark before we lower rss */ update_hiwater_rss(mm); if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (PageHuge(page)) { hugetlb_count_sub(compound_nr(page), mm); set_huge_swap_pte_at(mm, address, pvmw.pte, pteval, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); set_pte_at(mm, address, pvmw.pte, pteval); } } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan * will take care of the rest. * A future reference will then fault in a new zero * page. When userfaultfd is active, we must not drop * this page though, as its main user (postcopy * migration) will not expect userfaults on already * copied pages. */ dec_mm_counter(mm, mm_counter(page)); /* We have to invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) { swp_entry_t entry; pte_t swp_pte; if (arch_unmap_one(mm, vma, address, pteval) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; } /* * Store the pfn of the page in a special migration * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ entry = make_migration_entry(subpage, pte_write(pteval)); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); if (pte_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, address, pvmw.pte, swp_pte); /* * No need to invalidate here it will synchronize on * against the special swap migration pte. */ } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(subpage) }; pte_t swp_pte; /* * Store the swap location in the pte. * See handle_pte_fault() ... */ if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { WARN_ON_ONCE(1); ret = false; /* We have to invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); page_vma_mapped_walk_done(&pvmw); break; } /* MADV_FREE page check */ if (!PageSwapBacked(page)) { if (!PageDirty(page)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); dec_mm_counter(mm, MM_ANONPAGES); goto discard; } /* * If the page was redirtied, it cannot be * discarded. Remap the page to page table. */ set_pte_at(mm, address, pvmw.pte, pteval); SetPageSwapBacked(page); ret = false; page_vma_mapped_walk_done(&pvmw); break; } if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; } if (arch_unmap_one(mm, vma, address, pteval) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; } if (list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); if (list_empty(&mm->mmlist)) list_add(&mm->mmlist, &init_mm.mmlist); spin_unlock(&mmlist_lock); } dec_mm_counter(mm, MM_ANONPAGES); inc_mm_counter(mm, MM_SWAPENTS); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); if (pte_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, address, pvmw.pte, swp_pte); /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); } else { /* * This is a locked file-backed page, thus it cannot * be removed from the page cache and replaced by a new * page before mmu_notifier_invalidate_range_end, so no * concurrent thread might update its page table to * point at new page while a device still is using this * page. * * See Documentation/vm/mmu_notifier.rst */ dec_mm_counter(mm, mm_counter_file(page)); } discard: /* * No need to call mmu_notifier_invalidate_range() it has be * done above for all cases requiring it to happen under page * table lock before mmu_notifier_invalidate_range_end() * * See Documentation/vm/mmu_notifier.rst */ page_remove_rmap(subpage, PageHuge(page)); put_page(page); } mmu_notifier_invalidate_range_end(&range); return ret; } static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) { return vma_is_temporary_stack(vma); } static int page_not_mapped(struct page *page) { return !page_mapped(page); } /** * try_to_unmap - try to remove all page table mappings to a page * @page: the page to get unmapped * @flags: action and flags * * Tries to remove all the page table entries which are mapping this * page, used in the pageout path. Caller must hold the page lock. * * If unmap is successful, return true. Otherwise, false. */ bool try_to_unmap(struct page *page, enum ttu_flags flags) { struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)flags, .done = page_not_mapped, .anon_lock = page_lock_anon_vma_read, }; /* * During exec, a temporary VMA is setup and later moved. * The VMA is moved under the anon_vma lock but not the * page tables leading to a race where migration cannot * find the migration ptes. Rather than increasing the * locking requirements of exec(), migration skips * temporary VMAs until after exec() completes. */ if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE)) && !PageKsm(page) && PageAnon(page)) rwc.invalid_vma = invalid_migration_vma; if (flags & TTU_RMAP_LOCKED) rmap_walk_locked(page, &rwc); else rmap_walk(page, &rwc); /* * When racing against e.g. zap_pte_range() on another cpu, * in between its ptep_get_and_clear_full() and page_remove_rmap(), * try_to_unmap() may return false when it is about to become true, * if page table locking is skipped: use TTU_SYNC to wait for that. */ return !page_mapcount(page); } /** * try_to_munlock - try to munlock a page * @page: the page to be munlocked * * Called from munlock code. Checks all of the VMAs mapping the page * to make sure nobody else has this page mlocked. The page will be * returned with PG_mlocked cleared if no other vmas have it mlocked. */ void try_to_munlock(struct page *page) { struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)TTU_MUNLOCK, .done = page_not_mapped, .anon_lock = page_lock_anon_vma_read, }; VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); rmap_walk(page, &rwc); } void __put_anon_vma(struct anon_vma *anon_vma) { struct anon_vma *root = anon_vma->root; anon_vma_free(anon_vma); if (root != anon_vma && atomic_dec_and_test(&root->refcount)) anon_vma_free(root); } static struct anon_vma *rmap_walk_anon_lock(struct page *page, struct rmap_walk_control *rwc) { struct anon_vma *anon_vma; if (rwc->anon_lock) return rwc->anon_lock(page); /* * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() * because that depends on page_mapped(); but not all its usages * are holding mmap_lock. Users without mmap_lock are required to * take a reference count to prevent the anon_vma disappearing */ anon_vma = page_anon_vma(page); if (!anon_vma) return NULL; anon_vma_lock_read(anon_vma); return anon_vma; } /* * rmap_walk_anon - do something to anonymous page using the object-based * rmap method * @page: the page to be handled * @rwc: control variable according to each walk type * * Find all the mappings of a page using the mapping pointer and the vma chains * contained in the anon_vma struct it points to. * * When called from try_to_munlock(), the mmap_lock of the mm containing the vma * where the page was found will be held for write. So, we won't recheck * vm_flags for that VMA. That should be OK, because that vma shouldn't be * LOCKED. */ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, bool locked) { struct anon_vma *anon_vma; pgoff_t pgoff_start, pgoff_end; struct anon_vma_chain *avc; if (locked) { anon_vma = page_anon_vma(page); /* anon_vma disappear under us? */ VM_BUG_ON_PAGE(!anon_vma, page); } else { anon_vma = rmap_walk_anon_lock(page, rwc); } if (!anon_vma) return; pgoff_start = page_to_pgoff(page); pgoff_end = pgoff_start + thp_nr_pages(page) - 1; anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff_start, pgoff_end) { struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; if (!rwc->rmap_one(page, vma, address, rwc->arg)) break; if (rwc->done && rwc->done(page)) break; } if (!locked) anon_vma_unlock_read(anon_vma); } /* * rmap_walk_file - do something to file page using the object-based rmap method * @page: the page to be handled * @rwc: control variable according to each walk type * * Find all the mappings of a page using the mapping pointer and the vma chains * contained in the address_space struct it points to. * * When called from try_to_munlock(), the mmap_lock of the mm containing the vma * where the page was found will be held for write. So, we won't recheck * vm_flags for that VMA. That should be OK, because that vma shouldn't be * LOCKED. */ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, bool locked) { struct address_space *mapping = page_mapping(page); pgoff_t pgoff_start, pgoff_end; struct vm_area_struct *vma; /* * The page lock not only makes sure that page->mapping cannot * suddenly be NULLified by truncation, it makes sure that the * structure at mapping cannot be freed and reused yet, * so we can safely take mapping->i_mmap_rwsem. */ VM_BUG_ON_PAGE(!PageLocked(page), page); if (!mapping) return; pgoff_start = page_to_pgoff(page); pgoff_end = pgoff_start + thp_nr_pages(page) - 1; if (!locked) i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; if (!rwc->rmap_one(page, vma, address, rwc->arg)) goto done; if (rwc->done && rwc->done(page)) goto done; } done: if (!locked) i_mmap_unlock_read(mapping); } void rmap_walk(struct page *page, struct rmap_walk_control *rwc) { if (unlikely(PageKsm(page))) rmap_walk_ksm(page, rwc); else if (PageAnon(page)) rmap_walk_anon(page, rwc, false); else rmap_walk_file(page, rwc, false); } /* Like rmap_walk, but caller holds relevant rmap lock */ void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc) { /* no ksm support for now */ VM_BUG_ON_PAGE(PageKsm(page), page); if (PageAnon(page)) rmap_walk_anon(page, rwc, true); else rmap_walk_file(page, rwc, true); } #ifdef CONFIG_HUGETLB_PAGE /* * The following two functions are for anonymous (private mapped) hugepages. * Unlike common anonymous pages, anonymous hugepages have no accounting code * and no lru code, because we handle hugepages differently from common pages. */ void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = vma->anon_vma; int first; BUG_ON(!PageLocked(page)); BUG_ON(!anon_vma); /* address might be in next vma when migration races vma_adjust */ first = atomic_inc_and_test(compound_mapcount_ptr(page)); if (first) __page_set_anon_rmap(page, vma, address, 0); } void hugepage_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { BUG_ON(address < vma->vm_start || address >= vma->vm_end); atomic_set(compound_mapcount_ptr(page), 0); if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0); __page_set_anon_rmap(page, vma, address, 1); } #endif /* CONFIG_HUGETLB_PAGE */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 /* SPDX-License-Identifier: GPL-2.0 */ /* Freezer declarations */ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED #include <linux/debug_locks.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> #ifdef CONFIG_FREEZER extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ /* * Timeout for stopping processes */ extern unsigned int freeze_timeout_msecs; /* * Check if a process has been frozen */ static inline bool frozen(struct task_struct *p) { return p->flags & PF_FROZEN; } extern bool freezing_slow_path(struct task_struct *p); /* * Check if there is a request to freeze a process */ static inline bool freezing(struct task_struct *p) { if (likely(!atomic_read(&system_freezing_cnt))) return false; return freezing_slow_path(p); } /* Takes and releases task alloc lock using task_lock() */ extern void __thaw_task(struct task_struct *t); extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); /* * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION * If try_to_freeze causes a lockdep warning it means the caller may deadlock */ static inline bool try_to_freeze_unsafe(void) { might_sleep(); if (likely(!freezing(current))) return false; return __refrigerator(false); } static inline bool try_to_freeze(void) { if (!(current->flags & PF_NOFREEZE)) debug_check_no_locks_held(); return try_to_freeze_unsafe(); } extern bool freeze_task(struct task_struct *p); extern bool set_freezable(void); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); #else /* !CONFIG_CGROUP_FREEZER */ static inline bool cgroup_freezing(struct task_struct *task) { return false; } #endif /* !CONFIG_CGROUP_FREEZER */ /* * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it * calls wait_for_completion(&vfork) and reset right after it returns from this * function. Next, the parent should call try_to_freeze() to freeze itself * appropriately in case the child has exited before the freezing of tasks is * complete. However, we don't want kernel threads to be frozen in unexpected * places, so we allow them to block freeze_processes() instead or to set * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the * parent won't really block freeze_processes(), since ____call_usermodehelper() * (the child) does a little before exec/exit and it can't be frozen before * waking up the parent. */ /** * freezer_do_not_count - tell freezer to ignore %current * * Tell freezers to ignore the current task when determining whether the * target frozen state is reached. IOW, the current task will be * considered frozen enough by freezers. * * The caller shouldn't do anything which isn't allowed for a frozen task * until freezer_cont() is called. Usually, freezer[_do_not]_count() pair * wrap a scheduling operation and nothing much else. */ static inline void freezer_do_not_count(void) { current->flags |= PF_FREEZER_SKIP; } /** * freezer_count - tell freezer to stop ignoring %current * * Undo freezer_do_not_count(). It tells freezers that %current should be * considered again and tries to freeze if freezing condition is already in * effect. */ static inline void freezer_count(void) { current->flags &= ~PF_FREEZER_SKIP; /* * If freezing is in progress, the following paired with smp_mb() * in freezer_should_skip() ensures that either we see %true * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP. */ smp_mb(); try_to_freeze(); } /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ static inline void freezer_count_unsafe(void) { current->flags &= ~PF_FREEZER_SKIP; smp_mb(); try_to_freeze_unsafe(); } /** * freezer_should_skip - whether to skip a task when determining frozen * state is reached * @p: task in quesion * * This function is used by freezers after establishing %true freezing() to * test whether a task should be skipped when determining the target frozen * state is reached. IOW, if this function returns %true, @p is considered * frozen enough. */ static inline bool freezer_should_skip(struct task_struct *p) { /* * The following smp_mb() paired with the one in freezer_count() * ensures that either freezer_count() sees %true freezing() or we * see cleared %PF_FREEZER_SKIP and return %false. This makes it * impossible for a task to slip frozen state testing after * clearing %PF_FREEZER_SKIP. */ smp_mb(); return p->flags & PF_FREEZER_SKIP; } /* * These functions are intended to be used whenever you want allow a sleeping * task to be frozen. Note that neither return any clear indication of * whether a freeze event happened while in this function. */ /* Like schedule(), but should not block the freezer. */ static inline void freezable_schedule(void) { freezer_do_not_count(); schedule(); freezer_count(); } /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ static inline void freezable_schedule_unsafe(void) { freezer_do_not_count(); schedule(); freezer_count_unsafe(); } /* * Like schedule_timeout(), but should not block the freezer. Do not * call this with locks held. */ static inline long freezable_schedule_timeout(long timeout) { long __retval; freezer_do_not_count(); __retval = schedule_timeout(timeout); freezer_count(); return __retval; } /* * Like schedule_timeout_interruptible(), but should not block the freezer. Do not * call this with locks held. */ static inline long freezable_schedule_timeout_interruptible(long timeout) { long __retval; freezer_do_not_count(); __retval = schedule_timeout_interruptible(timeout); freezer_count(); return __retval; } /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) { long __retval; freezer_do_not_count(); __retval = schedule_timeout_interruptible(timeout); freezer_count_unsafe(); return __retval; } /* Like schedule_timeout_killable(), but should not block the freezer. */ static inline long freezable_schedule_timeout_killable(long timeout) { long __retval; freezer_do_not_count(); __retval = schedule_timeout_killable(timeout); freezer_count(); return __retval; } /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ static inline long freezable_schedule_timeout_killable_unsafe(long timeout) { long __retval; freezer_do_not_count(); __retval = schedule_timeout_killable(timeout); freezer_count_unsafe(); return __retval; } /* * Like schedule_hrtimeout_range(), but should not block the freezer. Do not * call this with locks held. */ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode) { int __retval; freezer_do_not_count(); __retval = schedule_hrtimeout_range(expires, delta, mode); freezer_count(); return __retval; } /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally * defined in <linux/wait.h> */ /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ #define wait_event_freezekillable_unsafe(wq, condition) \ ({ \ int __retval; \ freezer_do_not_count(); \ __retval = wait_event_killable(wq, (condition)); \ freezer_count_unsafe(); \ __retval; \ }) #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } static inline void __thaw_task(struct task_struct *t) {} static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} static inline bool try_to_freeze_nowarn(void) { return false; } static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} #define freezable_schedule() schedule() #define freezable_schedule_unsafe() schedule() #define freezable_schedule_timeout(timeout) schedule_timeout(timeout) #define freezable_schedule_timeout_interruptible(timeout) \ schedule_timeout_interruptible(timeout) #define freezable_schedule_timeout_interruptible_unsafe(timeout) \ schedule_timeout_interruptible(timeout) #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) #define freezable_schedule_timeout_killable_unsafe(timeout) \ schedule_timeout_killable(timeout) #define freezable_schedule_hrtimeout_range(expires, delta, mode) \ schedule_hrtimeout_range(expires, delta, mode) #define wait_event_freezekillable_unsafe(wq, condition) \ wait_event_killable(wq, condition) #endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PGALLLC_TRACK_H #define _LINUX_PGALLLC_TRACK_H #if defined(CONFIG_MMU) static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, unsigned long address, pgtbl_mod_mask *mod_mask) { if (unlikely(pgd_none(*pgd))) { if (__p4d_alloc(mm, pgd, address)) return NULL; *mod_mask |= PGTBL_PGD_MODIFIED; } return p4d_offset(pgd, address); } static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, unsigned long address, pgtbl_mod_mask *mod_mask) { if (unlikely(p4d_none(*p4d))) { if (__pud_alloc(mm, p4d, address)) return NULL; *mod_mask |= PGTBL_P4D_MODIFIED; } return pud_offset(p4d, address); } static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, unsigned long address, pgtbl_mod_mask *mod_mask) { if (unlikely(pud_none(*pud))) { if (__pmd_alloc(mm, pud, address)) return NULL; *mod_mask |= PGTBL_PUD_MODIFIED; } return pmd_offset(pud, address); } #endif /* CONFIG_MMU */ #define pte_alloc_kernel_track(pmd, address, mask) \ ((unlikely(pmd_none(*(pmd))) && \ (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\ NULL: pte_offset_kernel(pmd, address)) #endif /* _LINUX_PGALLLC_TRACK_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/nmi.h */ #ifndef LINUX_NMI_H #define LINUX_NMI_H #include <linux/sched.h> #include <asm/irq.h> #if defined(CONFIG_HAVE_NMI_WATCHDOG) #include <asm/nmi.h> #endif #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); bool is_hardlockup(void); extern int watchdog_user_enabled; extern int nmi_watchdog_user_enabled; extern int soft_watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; #else #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif /* !CONFIG_SMP */ #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; extern int lockup_detector_online_cpu(unsigned int cpu); extern int lockup_detector_offline_cpu(unsigned int cpu); #else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline void touch_softlockup_watchdog_sched(void) { } static inline void touch_softlockup_watchdog(void) { } static inline void touch_softlockup_watchdog_sync(void) { } static inline void touch_all_softlockup_watchdogs(void) { } #define lockup_detector_online_cpu NULL #define lockup_detector_offline_cpu NULL #endif /* CONFIG_SOFTLOCKUP_DETECTOR */ #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); #else static inline void reset_hung_task_detector(void) { } #endif /* * The run state of the lockup detectors is controlled by the content of the * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and * 'soft_watchdog_user_enabled' are variables that are only used as an * 'interface' between the parameters in /proc/sys/kernel and the internal * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is * handled differently because its value is not boolean, and the lockup * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ #define NMI_WATCHDOG_ENABLED_BIT 0 #define SOFT_WATCHDOG_ENABLED_BIT 1 #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); extern unsigned int hardlockup_panic; #else static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) # define NMI_WATCHDOG_SYSCTL_PERM 0644 #else # define NMI_WATCHDOG_SYSCTL_PERM 0444 #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } static inline void arch_touch_nmi_watchdog(void) {} # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif #endif void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); int watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. * * If the architecture supports the NMI watchdog, touch_nmi_watchdog() * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ static inline void touch_nmi_watchdog(void) { arch_touch_nmi_watchdog(); touch_softlockup_watchdog(); } /* * Create trigger_all_cpu_backtrace() out of the arch-provided * base function. Return whether such support was available, * to allow calling code to fall back to some other mechanism: */ #ifdef arch_trigger_cpumask_backtrace static inline bool trigger_all_cpu_backtrace(void) { arch_trigger_cpumask_backtrace(cpu_online_mask, false); return true; } static inline bool trigger_allbutself_cpu_backtrace(void) { arch_trigger_cpumask_backtrace(cpu_online_mask, true); return true; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { arch_trigger_cpumask_backtrace(mask, false); return true; } static inline bool trigger_single_cpu_backtrace(int cpu) { arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); return true; } /* generic implementation */ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self, void (*raise)(cpumask_t *mask)); bool nmi_cpu_backtrace(struct pt_regs *regs); #else static inline bool trigger_all_cpu_backtrace(void) { return false; } static inline bool trigger_allbutself_cpu_backtrace(void) { return false; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { return false; } static inline bool trigger_single_cpu_backtrace(int cpu) { return false; } #endif #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif struct ctl_table; int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BH_H #define _LINUX_BH_H #include <linux/preempt.h> #ifdef CONFIG_TRACE_IRQFLAGS extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { preempt_count_add(cnt); barrier(); } #endif static inline void local_bh_disable(void) { __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } extern void _local_bh_enable(void); extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt); static inline void local_bh_enable_ip(unsigned long ip) { __local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET); } static inline void local_bh_enable(void) { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } #endif /* _LINUX_BH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H #include <linux/neighbour.h> /* * Generic neighbour manipulation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * Changes: * * Harald Welte: <laforge@gnumonks.org> * - Add neighbour cache statistics like rtstat */ #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/bitmap.h> #include <linux/err.h> #include <linux/sysctl.h> #include <linux/workqueue.h> #include <net/rtnetlink.h> /* * NUD stands for "neighbor unreachability detection" */ #define NUD_IN_TIMER (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE) #define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY) #define NUD_CONNECTED (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE) struct neighbour; enum { NEIGH_VAR_MCAST_PROBES, NEIGH_VAR_UCAST_PROBES, NEIGH_VAR_APP_PROBES, NEIGH_VAR_MCAST_REPROBES, NEIGH_VAR_RETRANS_TIME, NEIGH_VAR_BASE_REACHABLE_TIME, NEIGH_VAR_DELAY_PROBE_TIME, NEIGH_VAR_GC_STALETIME, NEIGH_VAR_QUEUE_LEN_BYTES, NEIGH_VAR_PROXY_QLEN, NEIGH_VAR_ANYCAST_DELAY, NEIGH_VAR_PROXY_DELAY, NEIGH_VAR_LOCKTIME, #define NEIGH_VAR_DATA_MAX (NEIGH_VAR_LOCKTIME + 1) /* Following are used as a second way to access one of the above */ NEIGH_VAR_QUEUE_LEN, /* same data as NEIGH_VAR_QUEUE_LEN_BYTES */ NEIGH_VAR_RETRANS_TIME_MS, /* same data as NEIGH_VAR_RETRANS_TIME */ NEIGH_VAR_BASE_REACHABLE_TIME_MS, /* same data as NEIGH_VAR_BASE_REACHABLE_TIME */ /* Following are used by "default" only */ NEIGH_VAR_GC_INTERVAL, NEIGH_VAR_GC_THRESH1, NEIGH_VAR_GC_THRESH2, NEIGH_VAR_GC_THRESH3, NEIGH_VAR_MAX }; struct neigh_parms { possible_net_t net; struct net_device *dev; struct list_head list; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; void *sysctl_table; int dead; refcount_t refcnt; struct rcu_head rcu_head; int reachable_time; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; static inline void neigh_var_set(struct neigh_parms *p, int index, int val) { set_bit(index, p->data_state); p->data[index] = val; } #define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) /* In ndo_neigh_setup, NEIGH_VAR_INIT should be used. * In other cases, NEIGH_VAR_SET should be used. */ #define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) static inline void neigh_parms_data_state_setall(struct neigh_parms *p) { bitmap_fill(p->data_state, NEIGH_VAR_DATA_MAX); } static inline void neigh_parms_data_state_cleanall(struct neigh_parms *p) { bitmap_zero(p->data_state, NEIGH_VAR_DATA_MAX); } struct neigh_statistics { unsigned long allocs; /* number of allocated neighs */ unsigned long destroys; /* number of destroyed neighs */ unsigned long hash_grows; /* number of hash resizes */ unsigned long res_failed; /* number of failed resolutions */ unsigned long lookups; /* number of lookups */ unsigned long hits; /* number of hits (among lookups) */ unsigned long rcv_probes_mcast; /* number of received mcast ipv6 */ unsigned long rcv_probes_ucast; /* number of received ucast ipv6 */ unsigned long periodic_gc_runs; /* number of periodic GC runs */ unsigned long forced_gc_runs; /* number of forced GC runs */ unsigned long unres_discards; /* number of unresolved drops */ unsigned long table_fulls; /* times even gc couldn't help */ }; #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; unsigned long updated; rwlock_t lock; refcount_t refcnt; unsigned int arp_queue_len_bytes; struct sk_buff_head arp_queue; struct timer_list timer; unsigned long used; atomic_t probes; __u8 flags; __u8 nud_state; __u8 type; __u8 dead; u8 protocol; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; struct list_head gc_list; struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; } __randomize_layout; struct neigh_ops { int family; void (*solicit)(struct neighbour *, struct sk_buff *); void (*error_report)(struct neighbour *, struct sk_buff *); int (*output)(struct neighbour *, struct sk_buff *); int (*connected_output)(struct neighbour *, struct sk_buff *); }; struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; u8 flags; u8 protocol; u8 key[]; }; /* * neighbour table manipulation */ #define NEIGH_NUM_HASH_RND 4 struct neigh_hash_table { struct neighbour __rcu **hash_buckets; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; }; struct neigh_table { int family; unsigned int entry_size; unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); bool (*key_eq)(const struct neighbour *, const void *pkey); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); int (*is_multicast)(const void *pkey); bool (*allow_add)(const struct net_device *dev, struct netlink_ext_ack *extack); char *id; struct neigh_parms parms; struct list_head parms_list; int gc_interval; int gc_thresh1; int gc_thresh2; int gc_thresh3; unsigned long last_flush; struct delayed_work gc_work; struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; atomic_t gc_entries; struct list_head gc_list; rwlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; struct pneigh_entry **phash_buckets; }; enum { NEIGH_ARP_TABLE = 0, NEIGH_ND_TABLE = 1, NEIGH_DN_TABLE = 2, NEIGH_NR_TABLES, NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ }; static inline int neigh_parms_family(struct neigh_parms *p) { return p->tbl->family; } #define NEIGH_PRIV_ALIGN sizeof(long long) #define NEIGH_ENTRY_SIZE(size) ALIGN((size), NEIGH_PRIV_ALIGN) static inline void *neighbour_priv(const struct neighbour *n) { return (char *)n + n->tbl->entry_size; } /* flags for neigh_update() */ #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 #define NEIGH_UPDATE_F_USE 0x10000000 #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 extern const struct nla_policy nda_policy[]; static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) { return *(const u16 *)n->primary_key == *(const u16 *)pkey; } static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { return *(const u32 *)n->primary_key == *(const u32 *)pkey; } static inline bool neigh_key_eq128(const struct neighbour *n, const void *pkey) { const u32 *n32 = (const u32 *)n->primary_key; const u32 *p32 = pkey; return ((n32[0] ^ p32[0]) | (n32[1] ^ p32[1]) | (n32[2] ^ p32[2]) | (n32[3] ^ p32[3])) == 0; } static inline struct neighbour *___neigh_lookup_noref( struct neigh_table *tbl, bool (*key_eq)(const struct neighbour *n, const void *pkey), __u32 (*hash)(const void *pkey, const struct net_device *dev, __u32 *hash_rnd), const void *pkey, struct net_device *dev) { struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht); struct neighbour *n; u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n->next)) { if (n->dev == dev && key_eq(n, pkey)) return n; } return NULL; } static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev); struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey); struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref); static inline struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb); struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev); struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); static inline struct net *neigh_parms_net(const struct neigh_parms *parms) { return read_pnet(&parms->net); } unsigned long neigh_rand_reach_time(unsigned long base); void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat); struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); static inline struct net *pneigh_net(const struct pneigh_entry *pneigh) { return read_pnet(&pneigh->net); } void neigh_app_ns(struct neighbour *n); void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *); void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); struct neigh_seq_state { struct seq_net_private p; struct neigh_table *tbl; struct neigh_hash_table *nht; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); unsigned int bucket; unsigned int flags; #define NEIGH_SEQ_NEIGH_ONLY 0x00000001 #define NEIGH_SEQ_IS_PNEIGH 0x00000002 #define NEIGH_SEQ_SKIP_NOARP 0x00000004 }; void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *, unsigned int); void *neigh_seq_next(struct seq_file *, void *, loff_t *); void neigh_seq_stop(struct seq_file *, void *); int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, proc_handler *proc_handler); void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) { refcount_dec(&parms->refcnt); } static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) { refcount_inc(&parms->refcnt); return parms; } /* * Neighbour references */ static inline void neigh_release(struct neighbour *neigh) { if (refcount_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { if (neigh) refcount_inc(&neigh->refcnt); return neigh; } #define neigh_hold(n) refcount_inc(&(n)->refcnt) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq, hh_alen; do { seq = read_seqbegin(&hh->hh_lock); hh_alen = HH_DATA_ALIGN(ETH_HLEN); memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN); } while (read_seqretry(&hh->hh_lock, seq)); return 0; } #endif static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; do { seq = read_seqbegin(&hh->hh_lock); hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; /* skb_push() would proceed silently if we have room for * the unaligned size but not for the aligned size: * check headroom explicitly. */ if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { /* this is inlined by gcc */ memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); } } else { hh_alen = HH_DATA_ALIGN(hh_len); if (likely(skb_headroom(skb) >= hh_alen)) { memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); } } } while (read_seqretry(&hh->hh_lock, seq)); if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { kfree_skb(skb); return NET_XMIT_DROP; } __skb_push(skb, hh_len); return dev_queue_xmit(skb); } static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, bool skip_cache) { const struct hh_cache *hh = &n->hh; /* n->nud_state and hh->hh_len could be changed under us. * neigh_hh_output() is taking care of the race later. */ if (!skip_cache && (READ_ONCE(n->nud_state) & NUD_CONNECTED) && READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); return n->output(n, skb); } static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { struct neighbour *n = neigh_lookup(tbl, pkey, dev); if (n || !creat) return n; n = neigh_create(tbl, pkey, dev); return IS_ERR(n) ? NULL : n; } static inline struct neighbour * __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n = neigh_lookup(tbl, pkey, dev); if (n) return n; return neigh_create(tbl, pkey, dev); } struct neighbour_cb { unsigned long sched_next; unsigned int flags; }; #define LOCALLY_ENQUEUED 0x1 #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, const struct net_device *dev) { unsigned int seq; do { seq = read_seqbegin(&n->ha_lock); memcpy(dst, n->ha, dev->addr_len); } while (read_seqretry(&n->ha_lock, seq)); } static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, int *notify) { u8 ndm_flags = 0; ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0; if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) { if (ndm_flags & NTF_ROUTER) neigh->flags |= NTF_ROUTER; else neigh->flags &= ~NTF_ROUTER; *notify = 1; } } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0 */ /* linux/net/inet/arp.h */ #ifndef _ARP_H #define _ARP_H #include <linux/if_arp.h> #include <linux/hash.h> #include <net/neighbour.h> extern struct neigh_table arp_tbl; static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 *hash_rnd) { u32 key = *(const u32 *)pkey; u32 val = key ^ hash32_ptr(dev); return val * hash_rnd[0]; } #ifdef CONFIG_INET static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) key = INADDR_ANY; return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } #else static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { return NULL; } #endif static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) { struct neighbour *n; rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); return n; } static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) { struct neighbour *n; rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); if (n) { unsigned long now = jiffies; /* avoid dirtying neighbour */ if (READ_ONCE(n->confirmed) != now) WRITE_ONCE(n->confirmed, now); } rcu_read_unlock_bh(); } void arp_init(void); int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, const unsigned char *dest_hw, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, const unsigned char *dest_hw, const unsigned char *src_hw, const unsigned char *target_hw); void arp_xmit(struct sk_buff *skb); #endif /* _ARP_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2020 Christoph Hellwig. * * Support for "universal" pointers that can point to either kernel or userspace * memory. */ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H #include <linux/slab.h> #include <linux/uaccess.h> typedef struct { union { void *kernel; void __user *user; }; bool is_kernel : 1; } sockptr_t; static inline bool sockptr_is_kernel(sockptr_t sockptr) { return sockptr.is_kernel; } static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } static inline sockptr_t USER_SOCKPTR(void __user *p) { return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) { if (sockptr_is_kernel(sockptr)) return !sockptr.kernel; return !sockptr.user; } static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, size_t offset, size_t size) { if (!sockptr_is_kernel(src)) return copy_from_user(dst, src.user + offset, size); memcpy(dst, src.kernel + offset, size); return 0; } static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { return copy_from_sockptr_offset(dst, src, 0, size); } static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, const void *src, size_t size) { if (!sockptr_is_kernel(dst)) return copy_to_user(dst.user + offset, src, size); memcpy(dst.kernel + offset, src, size); return 0; } static inline void *memdup_sockptr(sockptr_t src, size_t len) { void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_sockptr(p, src, len)) { kfree(p); return ERR_PTR(-EFAULT); } return p; } static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) { char *p = kmalloc_track_caller(len + 1, GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_sockptr(p, src, len)) { kfree(p); return ERR_PTR(-EFAULT); } p[len] = '\0'; return p; } static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) { if (sockptr_is_kernel(src)) { size_t len = min(strnlen(src.kernel, count - 1) + 1, count); memcpy(dst, src.kernel, len); return len; } return strncpy_from_user(dst, src.user, count); } #endif /* _LINUX_SOCKPTR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_INTERNAL_H #define __CGROUP_INTERNAL_H #include <linux/cgroup.h> #include <linux/kernfs.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/refcount.h> #include <linux/fs_parser.h> #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; extern bool cgroup_debug; extern void __init enable_debug_cgroup(void); /* * cgroup_path() takes a spin lock. It is good practice not to take * spin locks within trace point handlers, as they are mostly hidden * from normal view. As cgroup_path() can take the kernfs_rename_lock * spin lock, it is best to not call that function from the trace event * handler. * * Note: trace_cgroup_##type##_enabled() is a static branch that will only * be set when the trace event is enabled. */ #define TRACE_CGROUP_PATH(type, cgrp, ...) \ do { \ if (trace_cgroup_##type##_enabled()) { \ unsigned long flags; \ spin_lock_irqsave(&trace_cgroup_path_lock, \ flags); \ cgroup_path(cgrp, trace_cgroup_path, \ TRACE_CGROUP_PATH_LEN); \ trace_cgroup_##type(cgrp, trace_cgroup_path, \ ##__VA_ARGS__); \ spin_unlock_irqrestore(&trace_cgroup_path_lock, \ flags); \ } \ } while (0) /* * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { struct kernfs_fs_context kfc; struct cgroup_root *root; struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ bool cpuset_clone_children; bool none; /* User explicitly requested empty subsystem */ bool all_ss; /* Seen 'all' option */ u16 subsys_mask; /* Selected subsystems */ char *name; /* Hierarchy name */ char *release_agent; /* Path for release notifications */ }; static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; return container_of(kfc, struct cgroup_fs_context, kfc); } /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other * direction, a css_set is naturally associated with multiple cgroups. * This M:N relationship is represented by the following link structure * which exists for each association and allows traversing the associations * from both sides. */ struct cgrp_cset_link { /* the cgroup and css_set this link associates */ struct cgroup *cgrp; struct css_set *cset; /* list of cgrp_cset_links anchored at cgrp->cset_links */ struct list_head cset_link; /* list of cgrp_cset_links anchored at css_set->cgrp_links */ struct list_head cgrp_link; }; /* used to track tasks and csets during migration */ struct cgroup_taskset { /* the src and dst cset list running through cset->mg_node */ struct list_head src_csets; struct list_head dst_csets; /* the number of tasks in the set */ int nr_tasks; /* the subsys currently being processed */ int ssid; /* * Fields for cgroup_taskset_*() iteration. * * Before migration is committed, the target migration tasks are on * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of * the csets on ->dst_csets. ->csets point to either ->src_csets * or ->dst_csets depending on whether migration is committed. * * ->cur_csets and ->cur_task point to the current task position * during iteration. */ struct list_head *csets; struct css_set *cur_cset; struct task_struct *cur_task; }; /* migration context also tracks preloading */ struct cgroup_mgctx { /* * Preloaded source and destination csets. Used to guarantee * atomic success or failure on actual migration. */ struct list_head preloaded_src_csets; struct list_head preloaded_dst_csets; /* tasks and csets to migrate */ struct cgroup_taskset tset; /* subsystems affected by migration */ u16 ss_mask; }; #define CGROUP_TASKSET_INIT(tset) \ { \ .src_csets = LIST_HEAD_INIT(tset.src_csets), \ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ .csets = &tset.src_csets, \ } #define CGROUP_MGCTX_INIT(name) \ { \ LIST_HEAD_INIT(name.preloaded_src_csets), \ LIST_HEAD_INIT(name.preloaded_dst_csets), \ CGROUP_TASKSET_INIT(name.tset), \ } #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; extern struct file_system_type cgroup_fs_type; /* iterate across the hierarchies */ #define for_each_root(root) \ list_for_each_entry((root), &cgroup_roots, root_list) /** * for_each_subsys - iterate all enabled cgroup subsystems * @ss: the iteration cursor * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end */ #define for_each_subsys(ss, ssid) \ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \ (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) static inline bool cgroup_is_dead(const struct cgroup *cgrp) { return !(cgrp->self.flags & CSS_ONLINE); } static inline bool notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } void put_css_set_locked(struct css_set *cset); static inline void put_css_set(struct css_set *cset) { unsigned long flags; /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an * rwlock */ if (refcount_dec_not_one(&cset->refcount)) return; spin_lock_irqsave(&css_set_lock, flags); put_css_set_locked(cset); spin_unlock_irqrestore(&css_set_lock, flags); } /* * refcounted get/put for css_set objects */ static inline void get_css_set(struct css_set *cset) { refcount_inc(&cset->refcount); } bool cgroup_ssid_enabled(int ssid); bool cgroup_on_dfl(const struct cgroup *cgrp); bool cgroup_is_thread_root(struct cgroup *cgrp); bool cgroup_is_threaded(struct cgroup *cgrp); struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup_root *root); struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline); void cgroup_kn_unlock(struct kernfs_node *kn); int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); int cgroup_do_get_tree(struct fs_context *fc); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, struct cgroup_mgctx *mgctx); int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx); int cgroup_migrate(struct task_struct *leader, bool threadgroup, struct cgroup_mgctx *mgctx); int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup); struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, bool *locked) __acquires(&cgroup_threadgroup_rwsem); void cgroup_procs_write_finish(struct task_struct *task, bool locked) __releases(&cgroup_threadgroup_rwsem); void cgroup_lock_and_drain_offline(struct cgroup *cgrp); int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode); int cgroup_rmdir(struct kernfs_node *kn); int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, struct kernfs_root *kf_root); int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp); /* * rstat.c */ int cgroup_rstat_init(struct cgroup *cgrp); void cgroup_rstat_exit(struct cgroup *cgrp); void cgroup_rstat_boot(void); void cgroup_base_stat_cputime_show(struct seq_file *seq); /* * namespace.c */ extern const struct proc_ns_operations cgroupns_operations; /* * cgroup-v1.c */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; extern const struct fs_parameter_spec cgroup1_fs_parameters[]; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); int cgroup1_get_tree(struct fs_context *fc); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __NET_CFG80211_H #define __NET_CFG80211_H /* * 802.11 device and configuration interface * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2020 Intel Corporation */ #include <linux/netdevice.h> #include <linux/debugfs.h> #include <linux/list.h> #include <linux/bug.h> #include <linux/netlink.h> #include <linux/skbuff.h> #include <linux/nl80211.h> #include <linux/if_ether.h> #include <linux/ieee80211.h> #include <linux/net.h> #include <net/regulatory.h> /** * DOC: Introduction * * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges * userspace and drivers, and offers some utility functionality associated * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used * by all modern wireless drivers in Linux, so that they offer a consistent * API through nl80211. For backward compatibility, cfg80211 also offers * wireless extensions to userspace, but hides them from drivers completely. * * Additionally, cfg80211 contains code to help enforce regulatory spectrum * use restrictions. */ /** * DOC: Device registration * * In order for a driver to use cfg80211, it must register the hardware device * with cfg80211. This happens through a number of hardware capability structs * described below. * * The fundamental structure for each device is the 'wiphy', of which each * instance describes a physical wireless device connected to the system. Each * such wiphy can have zero, one, or many virtual interfaces associated with * it, which need to be identified as such by pointing the network interface's * @ieee80211_ptr pointer to a &struct wireless_dev which further describes * the wireless part of the interface, normally this struct is embedded in the * network interface's private data area. Drivers can optionally allow creating * or destroying virtual interfaces on the fly, but without at least one or the * ability to create some the wireless device isn't useful. * * Each wiphy structure contains device capability information, and also has * a pointer to the various operations the driver offers. The definitions and * structures here describe these capabilities in detail. */ struct wiphy; /* * wireless hardware capability structures */ /** * enum ieee80211_channel_flags - channel flags * * Channel flags set by the regulatory control code. * * @IEEE80211_CHAN_DISABLED: This channel is disabled. * @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes * sending probe requests or beaconing. * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel * is not permitted. * @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel * is not permitted. * @IEEE80211_CHAN_NO_OFDM: OFDM is not allowed on this channel. * @IEEE80211_CHAN_NO_80MHZ: If the driver supports 80 MHz on the band, * this flag indicates that an 80 MHz channel cannot use this * channel as the control or any of the secondary channels. * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_NO_160MHZ: If the driver supports 160 MHz on the band, * this flag indicates that an 160 MHz channel cannot use this * channel as the control or any of the secondary channels. * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel. * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted * on this channel. * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted * on this channel. * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted * on this channel. * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted * on this channel. * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted * on this channel. * */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, IEEE80211_CHAN_NO_IR = 1<<1, /* hole at 1<<2 */ IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, IEEE80211_CHAN_NO_OFDM = 1<<6, IEEE80211_CHAN_NO_80MHZ = 1<<7, IEEE80211_CHAN_NO_160MHZ = 1<<8, IEEE80211_CHAN_INDOOR_ONLY = 1<<9, IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, IEEE80211_CHAN_NO_HE = 1<<13, IEEE80211_CHAN_1MHZ = 1<<14, IEEE80211_CHAN_2MHZ = 1<<15, IEEE80211_CHAN_4MHZ = 1<<16, IEEE80211_CHAN_8MHZ = 1<<17, IEEE80211_CHAN_16MHZ = 1<<18, }; #define IEEE80211_CHAN_NO_HT40 \ (IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS) #define IEEE80211_DFS_MIN_CAC_TIME_MS 60000 #define IEEE80211_DFS_MIN_NOP_TIME_MS (30 * 60 * 1000) /** * struct ieee80211_channel - channel definition * * This structure describes a single channel for use * with cfg80211. * * @center_freq: center frequency in MHz * @freq_offset: offset from @center_freq, in KHz * @hw_value: hardware-specific value for the channel * @flags: channel flags from &enum ieee80211_channel_flags. * @orig_flags: channel flags at registration time, used by regulatory * code to support devices with additional restrictions * @band: band this channel belongs to. * @max_antenna_gain: maximum antenna gain in dBi * @max_power: maximum transmission power (in dBm) * @max_reg_power: maximum regulatory transmission power (in dBm) * @beacon_found: helper to regulatory code to indicate when a beacon * has been found on this channel. Use regulatory_hint_found_beacon() * to enable this, this is useful only on 5 GHz band. * @orig_mag: internal use * @orig_mpwr: internal use * @dfs_state: current state of this channel. Only relevant if radar is required * on this channel. * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. */ struct ieee80211_channel { enum nl80211_band band; u32 center_freq; u16 freq_offset; u16 hw_value; u32 flags; int max_antenna_gain; int max_power; int max_reg_power; bool beacon_found; u32 orig_flags; int orig_mag, orig_mpwr; enum nl80211_dfs_state dfs_state; unsigned long dfs_state_entered; unsigned int dfs_cac_ms; }; /** * enum ieee80211_rate_flags - rate flags * * Hardware/specification flags for rates. These are structured * in a way that allows using the same bitrate structure for * different bands/PHY modes. * * @IEEE80211_RATE_SHORT_PREAMBLE: Hardware can send with short * preamble on this bitrate; only relevant in 2.4GHz band and * with CCK rates. * @IEEE80211_RATE_MANDATORY_A: This bitrate is a mandatory rate * when used with 802.11a (on the 5 GHz band); filled by the * core code when registering the wiphy. * @IEEE80211_RATE_MANDATORY_B: This bitrate is a mandatory rate * when used with 802.11b (on the 2.4 GHz band); filled by the * core code when registering the wiphy. * @IEEE80211_RATE_MANDATORY_G: This bitrate is a mandatory rate * when used with 802.11g (on the 2.4 GHz band); filled by the * core code when registering the wiphy. * @IEEE80211_RATE_ERP_G: This is an ERP rate in 802.11g mode. * @IEEE80211_RATE_SUPPORTS_5MHZ: Rate can be used in 5 MHz mode * @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode */ enum ieee80211_rate_flags { IEEE80211_RATE_SHORT_PREAMBLE = 1<<0, IEEE80211_RATE_MANDATORY_A = 1<<1, IEEE80211_RATE_MANDATORY_B = 1<<2, IEEE80211_RATE_MANDATORY_G = 1<<3, IEEE80211_RATE_ERP_G = 1<<4, IEEE80211_RATE_SUPPORTS_5MHZ = 1<<5, IEEE80211_RATE_SUPPORTS_10MHZ = 1<<6, }; /** * enum ieee80211_bss_type - BSS type filter * * @IEEE80211_BSS_TYPE_ESS: Infrastructure BSS * @IEEE80211_BSS_TYPE_PBSS: Personal BSS * @IEEE80211_BSS_TYPE_IBSS: Independent BSS * @IEEE80211_BSS_TYPE_MBSS: Mesh BSS * @IEEE80211_BSS_TYPE_ANY: Wildcard value for matching any BSS type */ enum ieee80211_bss_type { IEEE80211_BSS_TYPE_ESS, IEEE80211_BSS_TYPE_PBSS, IEEE80211_BSS_TYPE_IBSS, IEEE80211_BSS_TYPE_MBSS, IEEE80211_BSS_TYPE_ANY }; /** * enum ieee80211_privacy - BSS privacy filter * * @IEEE80211_PRIVACY_ON: privacy bit set * @IEEE80211_PRIVACY_OFF: privacy bit clear * @IEEE80211_PRIVACY_ANY: Wildcard value for matching any privacy setting */ enum ieee80211_privacy { IEEE80211_PRIVACY_ON, IEEE80211_PRIVACY_OFF, IEEE80211_PRIVACY_ANY }; #define IEEE80211_PRIVACY(x) \ ((x) ? IEEE80211_PRIVACY_ON : IEEE80211_PRIVACY_OFF) /** * struct ieee80211_rate - bitrate definition * * This structure describes a bitrate that an 802.11 PHY can * operate with. The two values @hw_value and @hw_value_short * are only for driver use when pointers to this structure are * passed around. * * @flags: rate-specific flags * @bitrate: bitrate in units of 100 Kbps * @hw_value: driver/hardware value for this rate * @hw_value_short: driver/hardware value for this rate when * short preamble is used */ struct ieee80211_rate { u32 flags; u16 bitrate; u16 hw_value, hw_value_short; }; /** * struct ieee80211_he_obss_pd - AP settings for spatial reuse * * @enable: is the feature enabled. * @sr_ctrl: The SR Control field of SRP element. * @non_srg_max_offset: non-SRG maximum tx power offset * @min_offset: minimal tx power offset an associated station shall use * @max_offset: maximum tx power offset an associated station shall use * @bss_color_bitmap: bitmap that indicates the BSS color values used by * members of the SRG * @partial_bssid_bitmap: bitmap that indicates the partial BSSID values * used by members of the SRG */ struct ieee80211_he_obss_pd { bool enable; u8 sr_ctrl; u8 non_srg_max_offset; u8 min_offset; u8 max_offset; u8 bss_color_bitmap[8]; u8 partial_bssid_bitmap[8]; }; /** * struct cfg80211_he_bss_color - AP settings for BSS coloring * * @color: the current color. * @enabled: HE BSS color is used * @partial: define the AID equation. */ struct cfg80211_he_bss_color { u8 color; bool enabled; bool partial; }; /** * struct ieee80211_he_bss_color - AP settings for BSS coloring * * @color: the current color. * @disabled: is the feature disabled. * @partial: define the AID equation. */ struct ieee80211_he_bss_color { u8 color; bool disabled; bool partial; }; /** * struct ieee80211_sta_ht_cap - STA's HT capabilities * * This structure describes most essential parameters needed * to describe 802.11n HT capabilities for an STA. * * @ht_supported: is HT supported by the STA * @cap: HT capabilities map as described in 802.11n spec * @ampdu_factor: Maximum A-MPDU length factor * @ampdu_density: Minimum A-MPDU spacing * @mcs: Supported MCS rates */ struct ieee80211_sta_ht_cap { u16 cap; /* use IEEE80211_HT_CAP_ */ bool ht_supported; u8 ampdu_factor; u8 ampdu_density; struct ieee80211_mcs_info mcs; }; /** * struct ieee80211_sta_vht_cap - STA's VHT capabilities * * This structure describes most essential parameters needed * to describe 802.11ac VHT capabilities for an STA. * * @vht_supported: is VHT supported by the STA * @cap: VHT capabilities map as described in 802.11ac spec * @vht_mcs: Supported VHT MCS rates */ struct ieee80211_sta_vht_cap { bool vht_supported; u32 cap; /* use IEEE80211_VHT_CAP_ */ struct ieee80211_vht_mcs_info vht_mcs; }; #define IEEE80211_HE_PPE_THRES_MAX_LEN 25 /** * struct ieee80211_sta_he_cap - STA's HE capabilities * * This structure describes most essential parameters needed * to describe 802.11ax HE capabilities for a STA. * * @has_he: true iff HE data is valid. * @he_cap_elem: Fixed portion of the HE capabilities element. * @he_mcs_nss_supp: The supported NSS/MCS combinations. * @ppe_thres: Holds the PPE Thresholds data. */ struct ieee80211_sta_he_cap { bool has_he; struct ieee80211_he_cap_elem he_cap_elem; struct ieee80211_he_mcs_nss_supp he_mcs_nss_supp; u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN]; }; /** * struct ieee80211_sband_iftype_data * * This structure encapsulates sband data that is relevant for the * interface types defined in @types_mask. Each type in the * @types_mask must be unique across all instances of iftype_data. * * @types_mask: interface types mask * @he_cap: holds the HE capabilities * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a * 6 GHz band channel (and 0 may be valid value). */ struct ieee80211_sband_iftype_data { u16 types_mask; struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; }; /** * enum ieee80211_edmg_bw_config - allowed channel bandwidth configurations * * @IEEE80211_EDMG_BW_CONFIG_4: 2.16GHz * @IEEE80211_EDMG_BW_CONFIG_5: 2.16GHz and 4.32GHz * @IEEE80211_EDMG_BW_CONFIG_6: 2.16GHz, 4.32GHz and 6.48GHz * @IEEE80211_EDMG_BW_CONFIG_7: 2.16GHz, 4.32GHz, 6.48GHz and 8.64GHz * @IEEE80211_EDMG_BW_CONFIG_8: 2.16GHz and 2.16GHz + 2.16GHz * @IEEE80211_EDMG_BW_CONFIG_9: 2.16GHz, 4.32GHz and 2.16GHz + 2.16GHz * @IEEE80211_EDMG_BW_CONFIG_10: 2.16GHz, 4.32GHz, 6.48GHz and 2.16GHz+2.16GHz * @IEEE80211_EDMG_BW_CONFIG_11: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz and * 2.16GHz+2.16GHz * @IEEE80211_EDMG_BW_CONFIG_12: 2.16GHz, 2.16GHz + 2.16GHz and * 4.32GHz + 4.32GHz * @IEEE80211_EDMG_BW_CONFIG_13: 2.16GHz, 4.32GHz, 2.16GHz + 2.16GHz and * 4.32GHz + 4.32GHz * @IEEE80211_EDMG_BW_CONFIG_14: 2.16GHz, 4.32GHz, 6.48GHz, 2.16GHz + 2.16GHz * and 4.32GHz + 4.32GHz * @IEEE80211_EDMG_BW_CONFIG_15: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz, * 2.16GHz + 2.16GHz and 4.32GHz + 4.32GHz */ enum ieee80211_edmg_bw_config { IEEE80211_EDMG_BW_CONFIG_4 = 4, IEEE80211_EDMG_BW_CONFIG_5 = 5, IEEE80211_EDMG_BW_CONFIG_6 = 6, IEEE80211_EDMG_BW_CONFIG_7 = 7, IEEE80211_EDMG_BW_CONFIG_8 = 8, IEEE80211_EDMG_BW_CONFIG_9 = 9, IEEE80211_EDMG_BW_CONFIG_10 = 10, IEEE80211_EDMG_BW_CONFIG_11 = 11, IEEE80211_EDMG_BW_CONFIG_12 = 12, IEEE80211_EDMG_BW_CONFIG_13 = 13, IEEE80211_EDMG_BW_CONFIG_14 = 14, IEEE80211_EDMG_BW_CONFIG_15 = 15, }; /** * struct ieee80211_edmg - EDMG configuration * * This structure describes most essential parameters needed * to describe 802.11ay EDMG configuration * * @channels: bitmap that indicates the 2.16 GHz channel(s) * that are allowed to be used for transmissions. * Bit 0 indicates channel 1, bit 1 indicates channel 2, etc. * Set to 0 indicate EDMG not supported. * @bw_config: Channel BW Configuration subfield encodes * the allowed channel bandwidth configurations */ struct ieee80211_edmg { u8 channels; enum ieee80211_edmg_bw_config bw_config; }; /** * struct ieee80211_sta_s1g_cap - STA's S1G capabilities * * This structure describes most essential parameters needed * to describe 802.11ah S1G capabilities for a STA. * * @s1g_supported: is STA an S1G STA * @cap: S1G capabilities information * @nss_mcs: Supported NSS MCS set */ struct ieee80211_sta_s1g_cap { bool s1g; u8 cap[10]; /* use S1G_CAPAB_ */ u8 nss_mcs[5]; }; /** * struct ieee80211_supported_band - frequency band definition * * This structure describes a frequency band a wiphy * is able to operate in. * * @channels: Array of channels the hardware can operate with * in this band. * @band: the band this structure represents * @n_channels: Number of channels in @channels * @bitrates: Array of bitrates the hardware can operate with * in this band. Must be sorted to give a valid "supported * rates" IE, i.e. CCK rates first, then OFDM. * @n_bitrates: Number of bitrates in @bitrates * @ht_cap: HT capabilities in this band * @vht_cap: VHT capabilities in this band * @s1g_cap: S1G capabilities in this band * @edmg_cap: EDMG capabilities in this band * @s1g_cap: S1G capabilities in this band (S1B band only, of course) * @n_iftype_data: number of iftype data entries * @iftype_data: interface type data entries. Note that the bits in * @types_mask inside this structure cannot overlap (i.e. only * one occurrence of each type is allowed across all instances of * iftype_data). */ struct ieee80211_supported_band { struct ieee80211_channel *channels; struct ieee80211_rate *bitrates; enum nl80211_band band; int n_channels; int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_edmg edmg_cap; u16 n_iftype_data; const struct ieee80211_sband_iftype_data *iftype_data; }; /** * ieee80211_get_sband_iftype_data - return sband data for a given iftype * @sband: the sband to search for the STA on * @iftype: enum nl80211_iftype * * Return: pointer to struct ieee80211_sband_iftype_data, or NULL is none found */ static inline const struct ieee80211_sband_iftype_data * ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, u8 iftype) { int i; if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) return NULL; for (i = 0; i < sband->n_iftype_data; i++) { const struct ieee80211_sband_iftype_data *data = &sband->iftype_data[i]; if (data->types_mask & BIT(iftype)) return data; } return NULL; } /** * ieee80211_get_he_iftype_cap - return HE capabilities for an sband's iftype * @sband: the sband to search for the iftype on * @iftype: enum nl80211_iftype * * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found */ static inline const struct ieee80211_sta_he_cap * ieee80211_get_he_iftype_cap(const struct ieee80211_supported_band *sband, u8 iftype) { const struct ieee80211_sband_iftype_data *data = ieee80211_get_sband_iftype_data(sband, iftype); if (data && data->he_cap.has_he) return &data->he_cap; return NULL; } /** * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA * @sband: the sband to search for the STA on * * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found */ static inline const struct ieee80211_sta_he_cap * ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband) { return ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_STATION); } /** * ieee80211_get_he_6ghz_capa - return HE 6 GHz capabilities * @sband: the sband to search for the STA on * @iftype: the iftype to search for * * Return: the 6GHz capabilities */ static inline __le16 ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, enum nl80211_iftype iftype) { const struct ieee80211_sband_iftype_data *data = ieee80211_get_sband_iftype_data(sband, iftype); if (WARN_ON(!data || !data->he_cap.has_he)) return 0; return data->he_6ghz_capa.capa; } /** * wiphy_read_of_freq_limits - read frequency limits from device tree * * @wiphy: the wireless device to get extra limits for * * Some devices may have extra limitations specified in DT. This may be useful * for chipsets that normally support more bands but are limited due to board * design (e.g. by antennas or external power amplifier). * * This function reads info from DT and uses it to *modify* channels (disable * unavailable ones). It's usually a *bad* idea to use it in drivers with * shared channel data as DT limitations are device specific. You should make * sure to call it only if channels in wiphy are copied and can be modified * without affecting other devices. * * As this function access device node it has to be called after set_wiphy_dev. * It also modifies channels so they have to be set first. * If using this helper, call it before wiphy_register(). */ #ifdef CONFIG_OF void wiphy_read_of_freq_limits(struct wiphy *wiphy); #else /* CONFIG_OF */ static inline void wiphy_read_of_freq_limits(struct wiphy *wiphy) { } #endif /* !CONFIG_OF */ /* * Wireless hardware/device configuration structures and methods */ /** * DOC: Actions and configuration * * Each wireless device and each virtual interface offer a set of configuration * operations and other actions that are invoked by userspace. Each of these * actions is described in the operations structure, and the parameters these * operations use are described separately. * * Additionally, some operations are asynchronous and expect to get status * information via some functions that drivers need to call. * * Scanning and BSS list handling with its associated functionality is described * in a separate chapter. */ #define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\ WLAN_USER_POSITION_LEN) /** * struct vif_params - describes virtual interface parameters * @flags: monitor interface flags, unchanged if 0, otherwise * %MONITOR_FLAG_CHANGED will be set * @use_4addr: use 4-address frames * @macaddr: address to use for this virtual interface. * If this parameter is set to zero address the driver may * determine the address as needed. * This feature is only fully supported by drivers that enable the * %NL80211_FEATURE_MAC_ON_CREATE flag. Others may support creating ** only p2p devices with specified MAC. * @vht_mumimo_groups: MU-MIMO groupID, used for monitoring MU-MIMO packets * belonging to that MU-MIMO groupID; %NULL if not changed * @vht_mumimo_follow_addr: MU-MIMO follow address, used for monitoring * MU-MIMO packets going to the specified station; %NULL if not changed */ struct vif_params { u32 flags; int use_4addr; u8 macaddr[ETH_ALEN]; const u8 *vht_mumimo_groups; const u8 *vht_mumimo_follow_addr; }; /** * struct key_params - key information * * Information about a key * * @key: key material * @key_len: length of key material * @cipher: cipher suite selector * @seq: sequence counter (IV/PN) for TKIP and CCMP keys, only used * with the get_key() callback, must be in little endian, * length given by @seq_len. * @seq_len: length of @seq. * @vlan_id: vlan_id for VLAN group key (if nonzero) * @mode: key install mode (RX_TX, NO_TX or SET_TX) */ struct key_params { const u8 *key; const u8 *seq; int key_len; int seq_len; u16 vlan_id; u32 cipher; enum nl80211_key_mode mode; }; /** * struct cfg80211_chan_def - channel definition * @chan: the (control) channel * @width: channel width * @center_freq1: center frequency of first segment * @center_freq2: center frequency of second segment * (only with 80+80 MHz) * @edmg: define the EDMG channels configuration. * If edmg is requested (i.e. the .channels member is non-zero), * chan will define the primary channel and all other * parameters are ignored. * @freq1_offset: offset from @center_freq1, in KHz */ struct cfg80211_chan_def { struct ieee80211_channel *chan; enum nl80211_chan_width width; u32 center_freq1; u32 center_freq2; struct ieee80211_edmg edmg; u16 freq1_offset; }; /* * cfg80211_bitrate_mask - masks for bitrate control */ struct cfg80211_bitrate_mask { struct { u32 legacy; u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mcs[NL80211_VHT_NSS_MAX]; u16 he_mcs[NL80211_HE_NSS_MAX]; enum nl80211_txrate_gi gi; enum nl80211_he_gi he_gi; enum nl80211_he_ltf he_ltf; } control[NUM_NL80211_BANDS]; }; /** * struct cfg80211_tid_cfg - TID specific configuration * @config_override: Flag to notify driver to reset TID configuration * of the peer. * @tids: bitmap of TIDs to modify * @mask: bitmap of attributes indicating which parameter changed, * similar to &nl80211_tid_config_supp. * @noack: noack configuration value for the TID * @retry_long: retry count value * @retry_short: retry count value * @ampdu: Enable/Disable MPDU aggregation * @rtscts: Enable/Disable RTS/CTS * @amsdu: Enable/Disable MSDU aggregation * @txrate_type: Tx bitrate mask type * @txrate_mask: Tx bitrate to be applied for the TID */ struct cfg80211_tid_cfg { bool config_override; u8 tids; u64 mask; enum nl80211_tid_config noack; u8 retry_long, retry_short; enum nl80211_tid_config ampdu; enum nl80211_tid_config rtscts; enum nl80211_tid_config amsdu; enum nl80211_tx_rate_setting txrate_type; struct cfg80211_bitrate_mask txrate_mask; }; /** * struct cfg80211_tid_config - TID configuration * @peer: Station's MAC address * @n_tid_conf: Number of TID specific configurations to be applied * @tid_conf: Configuration change info */ struct cfg80211_tid_config { const u8 *peer; u32 n_tid_conf; struct cfg80211_tid_cfg tid_conf[]; }; /** * cfg80211_get_chandef_type - return old channel type from chandef * @chandef: the channel definition * * Return: The old channel type (NOHT, HT20, HT40+/-) from a given * chandef, which must have a bandwidth allowing this conversion. */ static inline enum nl80211_channel_type cfg80211_get_chandef_type(const struct cfg80211_chan_def *chandef) { switch (chandef->width) { case NL80211_CHAN_WIDTH_20_NOHT: return NL80211_CHAN_NO_HT; case NL80211_CHAN_WIDTH_20: return NL80211_CHAN_HT20; case NL80211_CHAN_WIDTH_40: if (chandef->center_freq1 > chandef->chan->center_freq) return NL80211_CHAN_HT40PLUS; return NL80211_CHAN_HT40MINUS; default: WARN_ON(1); return NL80211_CHAN_NO_HT; } } /** * cfg80211_chandef_create - create channel definition using channel type * @chandef: the channel definition struct to fill * @channel: the control channel * @chantype: the channel type * * Given a channel type, create a channel definition. */ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct ieee80211_channel *channel, enum nl80211_channel_type chantype); /** * cfg80211_chandef_identical - check if two channel definitions are identical * @chandef1: first channel definition * @chandef2: second channel definition * * Return: %true if the channels defined by the channel definitions are * identical, %false otherwise. */ static inline bool cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2) { return (chandef1->chan == chandef2->chan && chandef1->width == chandef2->width && chandef1->center_freq1 == chandef2->center_freq1 && chandef1->freq1_offset == chandef2->freq1_offset && chandef1->center_freq2 == chandef2->center_freq2); } /** * cfg80211_chandef_is_edmg - check if chandef represents an EDMG channel * * @chandef: the channel definition * * Return: %true if EDMG defined, %false otherwise. */ static inline bool cfg80211_chandef_is_edmg(const struct cfg80211_chan_def *chandef) { return chandef->edmg.channels || chandef->edmg.bw_config; } /** * cfg80211_chandef_compatible - check if two channel definitions are compatible * @chandef1: first channel definition * @chandef2: second channel definition * * Return: %NULL if the given channel definitions are incompatible, * chandef1 or chandef2 otherwise. */ const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2); /** * cfg80211_chandef_valid - check if a channel definition is valid * @chandef: the channel definition to check * Return: %true if the channel definition is valid. %false otherwise. */ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef); /** * cfg80211_chandef_usable - check if secondary channels can be used * @wiphy: the wiphy to validate against * @chandef: the channel definition to check * @prohibited_flags: the regulatory channel flags that must not be set * Return: %true if secondary channels are usable. %false otherwise. */ bool cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags); /** * cfg80211_chandef_dfs_required - checks if radar detection is required * @wiphy: the wiphy to validate against * @chandef: the channel definition to check * @iftype: the interface type as specified in &enum nl80211_iftype * Returns: * 1 if radar detection is required, 0 if it is not, < 0 on error */ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype); /** * ieee80211_chandef_rate_flags - returns rate flags for a channel * * In some channel types, not all rates may be used - for example CCK * rates may not be used in 5/10 MHz channels. * * @chandef: channel definition for the channel * * Returns: rate flags which apply for this channel */ static inline enum ieee80211_rate_flags ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef) { switch (chandef->width) { case NL80211_CHAN_WIDTH_5: return IEEE80211_RATE_SUPPORTS_5MHZ; case NL80211_CHAN_WIDTH_10: return IEEE80211_RATE_SUPPORTS_10MHZ; default: break; } return 0; } /** * ieee80211_chandef_max_power - maximum transmission power for the chandef * * In some regulations, the transmit power may depend on the configured channel * bandwidth which may be defined as dBm/MHz. This function returns the actual * max_power for non-standard (20 MHz) channels. * * @chandef: channel definition for the channel * * Returns: maximum allowed transmission power in dBm for the chandef */ static inline int ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef) { switch (chandef->width) { case NL80211_CHAN_WIDTH_5: return min(chandef->chan->max_reg_power - 6, chandef->chan->max_power); case NL80211_CHAN_WIDTH_10: return min(chandef->chan->max_reg_power - 3, chandef->chan->max_power); default: break; } return chandef->chan->max_power; } /** * enum survey_info_flags - survey information flags * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in * @SURVEY_INFO_IN_USE: channel is currently being used * @SURVEY_INFO_TIME: active time (in ms) was filled in * @SURVEY_INFO_TIME_BUSY: busy time was filled in * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in * @SURVEY_INFO_TIME_RX: receive time was filled in * @SURVEY_INFO_TIME_TX: transmit time was filled in * @SURVEY_INFO_TIME_SCAN: scan time was filled in * @SURVEY_INFO_TIME_BSS_RX: local BSS receive time was filled in * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). */ enum survey_info_flags { SURVEY_INFO_NOISE_DBM = BIT(0), SURVEY_INFO_IN_USE = BIT(1), SURVEY_INFO_TIME = BIT(2), SURVEY_INFO_TIME_BUSY = BIT(3), SURVEY_INFO_TIME_EXT_BUSY = BIT(4), SURVEY_INFO_TIME_RX = BIT(5), SURVEY_INFO_TIME_TX = BIT(6), SURVEY_INFO_TIME_SCAN = BIT(7), SURVEY_INFO_TIME_BSS_RX = BIT(8), }; /** * struct survey_info - channel survey response * * @channel: the channel this survey record reports, may be %NULL for a single * record to report global statistics * @filled: bitflag of flags from &enum survey_info_flags * @noise: channel noise in dBm. This and all following fields are * optional * @time: amount of time in ms the radio was turn on (on the channel) * @time_busy: amount of time the primary channel was sensed busy * @time_ext_busy: amount of time the extension channel was sensed busy * @time_rx: amount of time the radio spent receiving data * @time_tx: amount of time the radio spent transmitting data * @time_scan: amount of time the radio spent for scanning * @time_bss_rx: amount of time the radio spent receiving data on a local BSS * * Used by dump_survey() to report back per-channel survey information. * * This structure can later be expanded with things like * channel duty cycle etc. */ struct survey_info { struct ieee80211_channel *channel; u64 time; u64 time_busy; u64 time_ext_busy; u64 time_rx; u64 time_tx; u64 time_scan; u64 time_bss_rx; u32 filled; s8 noise; }; #define CFG80211_MAX_WEP_KEYS 4 /** * struct cfg80211_crypto_settings - Crypto settings * @wpa_versions: indicates which, if any, WPA versions are enabled * (from enum nl80211_wpa_versions) * @cipher_group: group key cipher suite (or 0 if unset) * @n_ciphers_pairwise: number of AP supported unicast ciphers * @ciphers_pairwise: unicast key cipher suites * @n_akm_suites: number of AKM suites * @akm_suites: AKM suites * @control_port: Whether user space controls IEEE 802.1X port, i.e., * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. * @control_port_ethertype: the control port protocol that should be * allowed through even on unauthorized ports * @control_port_no_encrypt: TRUE to prevent encryption of control port * protocol frames. * @control_port_over_nl80211: TRUE if userspace expects to exchange control * port frames over NL80211 instead of the network interface. * @control_port_no_preauth: disables pre-auth rx over the nl80211 control * port for mac80211 * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key * @psk: PSK (for devices supporting 4-way-handshake offload) * @sae_pwd: password for SAE authentication (for devices supporting SAE * offload) * @sae_pwd_len: length of SAE password (for devices supporting SAE offload) */ struct cfg80211_crypto_settings { u32 wpa_versions; u32 cipher_group; int n_ciphers_pairwise; u32 ciphers_pairwise[NL80211_MAX_NR_CIPHER_SUITES]; int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; __be16 control_port_ethertype; bool control_port_no_encrypt; bool control_port_over_nl80211; bool control_port_no_preauth; struct key_params *wep_keys; int wep_tx_key; const u8 *psk; const u8 *sae_pwd; u8 sae_pwd_len; }; /** * struct cfg80211_beacon_data - beacon data * @head: head portion of beacon (before TIM IE) * or %NULL if not changed * @tail: tail portion of beacon (after TIM IE) * or %NULL if not changed * @head_len: length of @head * @tail_len: length of @tail * @beacon_ies: extra information element(s) to add into Beacon frames or %NULL * @beacon_ies_len: length of beacon_ies in octets * @proberesp_ies: extra information element(s) to add into Probe Response * frames or %NULL * @proberesp_ies_len: length of proberesp_ies in octets * @assocresp_ies: extra information element(s) to add into (Re)Association * Response frames or %NULL * @assocresp_ies_len: length of assocresp_ies in octets * @probe_resp_len: length of probe response template (@probe_resp) * @probe_resp: probe response template (AP mode only) * @ftm_responder: enable FTM responder functionality; -1 for no change * (which also implies no change in LCI/civic location data) * @lci: Measurement Report element content, starting with Measurement Token * (measurement type 8) * @civicloc: Measurement Report element content, starting with Measurement * Token (measurement type 11) * @lci_len: LCI data length * @civicloc_len: Civic location data length */ struct cfg80211_beacon_data { const u8 *head, *tail; const u8 *beacon_ies; const u8 *proberesp_ies; const u8 *assocresp_ies; const u8 *probe_resp; const u8 *lci; const u8 *civicloc; s8 ftm_responder; size_t head_len, tail_len; size_t beacon_ies_len; size_t proberesp_ies_len; size_t assocresp_ies_len; size_t probe_resp_len; size_t lci_len; size_t civicloc_len; }; struct mac_address { u8 addr[ETH_ALEN]; }; /** * struct cfg80211_acl_data - Access control list data * * @acl_policy: ACL policy to be applied on the station's * entry specified by mac_addr * @n_acl_entries: Number of MAC address entries passed * @mac_addrs: List of MAC addresses of stations to be used for ACL */ struct cfg80211_acl_data { enum nl80211_acl_policy acl_policy; int n_acl_entries; /* Keep it last */ struct mac_address mac_addrs[]; }; /** * struct cfg80211_fils_discovery - FILS discovery parameters from * IEEE Std 802.11ai-2016, Annex C.3 MIB detail. * * @min_interval: Minimum packet interval in TUs (0 - 10000) * @max_interval: Maximum packet interval in TUs (0 - 10000) * @tmpl_len: Template length * @tmpl: Template data for FILS discovery frame including the action * frame headers. */ struct cfg80211_fils_discovery { u32 min_interval; u32 max_interval; size_t tmpl_len; const u8 *tmpl; }; /** * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe * response parameters in 6GHz. * * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned * in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive * scanning * @tmpl_len: Template length * @tmpl: Template data for probe response */ struct cfg80211_unsol_bcast_probe_resp { u32 interval; size_t tmpl_len; const u8 *tmpl; }; /** * enum cfg80211_ap_settings_flags - AP settings flags * * Used by cfg80211_ap_settings * * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication */ enum cfg80211_ap_settings_flags { AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), }; /** * struct cfg80211_ap_settings - AP configuration * * Used to configure an AP interface. * * @chandef: defines the channel to use * @beacon: beacon data * @beacon_interval: beacon interval * @dtim_period: DTIM period * @ssid: SSID to be used in the BSS (note: may be %NULL if not provided from * user space) * @ssid_len: length of @ssid * @hidden_ssid: whether to hide the SSID in Beacon/Probe Response frames * @crypto: crypto settings * @privacy: the BSS uses privacy * @auth_type: Authentication type (algorithm) * @smps_mode: SMPS mode * @inactivity_timeout: time in seconds to determine station's inactivity. * @p2p_ctwindow: P2P CT Window * @p2p_opp_ps: P2P opportunistic PS * @acl: ACL configuration used by the drivers which has support for * MAC address based access control * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. * @beacon_rate: bitrate to be used for beacons * @ht_cap: HT capabilities (or %NULL if HT isn't enabled) * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled) * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT * @twt_responder: Enable Target Wait Time * @he_required: stations must support HE * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings * @he_bss_color: BSS Color settings * @he_oper: HE operation IE (or %NULL if HE isn't enabled) * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; struct cfg80211_beacon_data beacon; int beacon_interval, dtim_period; const u8 *ssid; size_t ssid_len; enum nl80211_hidden_ssid hidden_ssid; struct cfg80211_crypto_settings crypto; bool privacy; enum nl80211_auth_type auth_type; enum nl80211_smps_mode smps_mode; int inactivity_timeout; u8 p2p_ctwindow; bool p2p_opp_ps; const struct cfg80211_acl_data *acl; bool pbss; struct cfg80211_bitrate_mask beacon_rate; const struct ieee80211_ht_cap *ht_cap; const struct ieee80211_vht_cap *vht_cap; const struct ieee80211_he_cap_elem *he_cap; const struct ieee80211_he_operation *he_oper; bool ht_required, vht_required, he_required; bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; struct cfg80211_he_bss_color he_bss_color; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; }; /** * struct cfg80211_csa_settings - channel switch settings * * Used for channel switch * * @chandef: defines the channel to use after the switch * @beacon_csa: beacon data while performing the switch * @counter_offsets_beacon: offsets of the counters within the beacon (tail) * @counter_offsets_presp: offsets of the counters within the probe response * @n_counter_offsets_beacon: number of csa counters the beacon (tail) * @n_counter_offsets_presp: number of csa counters in the probe response * @beacon_after: beacon data to be used on the new channel * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch */ struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; struct cfg80211_beacon_data beacon_csa; const u16 *counter_offsets_beacon; const u16 *counter_offsets_presp; unsigned int n_counter_offsets_beacon; unsigned int n_counter_offsets_presp; struct cfg80211_beacon_data beacon_after; bool radar_required; bool block_tx; u8 count; }; #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 /** * struct iface_combination_params - input parameters for interface combinations * * Used to pass interface combination parameters * * @num_different_channels: the number of different channels we want * to use for verification * @radar_detect: a bitmap where each bit corresponds to a channel * width where radar detection is needed, as in the definition of * &struct ieee80211_iface_combination.@radar_detect_widths * @iftype_num: array with the number of interfaces of each interface * type. The index is the interface type as specified in &enum * nl80211_iftype. * @new_beacon_int: set this to the beacon interval of a new interface * that's not operating yet, if such is to be checked as part of * the verification */ struct iface_combination_params { int num_different_channels; u8 radar_detect; int iftype_num[NUM_NL80211_IFTYPES]; u32 new_beacon_int; }; /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability * @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. */ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), STATION_PARAM_APPLY_CAPABILITY = BIT(1), STATION_PARAM_APPLY_PLINK_STATE = BIT(2), STATION_PARAM_APPLY_STA_TXPOWER = BIT(3), }; /** * struct sta_txpwr - station txpower configuration * * Used to configure txpower for station. * * @power: tx power (in dBm) to be used for sending data traffic. If tx power * is not provided, the default per-interface tx power setting will be * overriding. Driver should be picking up the lowest tx power, either tx * power per-interface or per-station. * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power * will be less than or equal to specified from userspace, whereas if TPC * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power. * NL80211_TX_POWER_FIXED is not a valid configuration option for * per peer TPC. */ struct sta_txpwr { s16 power; enum nl80211_tx_power_setting type; }; /** * struct station_parameters - station parameters * * Used to change and create a new station. * * @vlan: vlan interface station should belong to * @supported_rates: supported rates in IEEE 802.11 format * (or NULL for no change) * @supported_rates_len: number of supported rates * @sta_flags_mask: station flags that changed * (bitmask of BIT(%NL80211_STA_FLAG_...)) * @sta_flags_set: station flags values * (bitmask of BIT(%NL80211_STA_FLAG_...)) * @listen_interval: listen interval or -1 for no change * @aid: AID or zero for no change * @vlan_id: VLAN ID for station (if nonzero) * @peer_aid: mesh peer AID or zero for no change * @plink_action: plink action to take * @plink_state: set the peer link state for a station * @ht_capa: HT capabilities of station * @vht_capa: VHT capabilities of station * @uapsd_queues: bitmap of queues configured for uapsd. same format * as the AC bitmap in the QoS info field * @max_sp: max Service Period. same format as the MAX_SP in the * QoS info field (but already shifted down) * @sta_modify_mask: bitmap indicating which parameters changed * (for those that don't have a natural "no change" value), * see &enum station_parameters_apply_mask * @local_pm: local link-specific mesh power save mode (no change when set * to unknown) * @capability: station capability * @ext_capab: extended capabilities of the station * @ext_capab_len: number of extended capabilities * @supported_channels: supported channels in IEEE 802.11 format * @supported_channels_len: number of supported channels * @supported_oper_classes: supported oper classes in IEEE 802.11 format * @supported_oper_classes_len: number of supported operating classes * @opmode_notif: operating mode field from Operating Mode Notification * @opmode_notif_used: information if operating mode field is used * @support_p2p_ps: information if station supports P2P PS mechanism * @he_capa: HE capabilities of station * @he_capa_len: the length of the HE capabilities * @airtime_weight: airtime scheduler weight for this station * @txpwr: transmit power for an associated station * @he_6ghz_capa: HE 6 GHz Band capabilities of station */ struct station_parameters { const u8 *supported_rates; struct net_device *vlan; u32 sta_flags_mask, sta_flags_set; u32 sta_modify_mask; int listen_interval; u16 aid; u16 vlan_id; u16 peer_aid; u8 supported_rates_len; u8 plink_action; u8 plink_state; const struct ieee80211_ht_cap *ht_capa; const struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; u16 capability; const u8 *ext_capab; u8 ext_capab_len; const u8 *supported_channels; u8 supported_channels_len; const u8 *supported_oper_classes; u8 supported_oper_classes_len; u8 opmode_notif; bool opmode_notif_used; int support_p2p_ps; const struct ieee80211_he_cap_elem *he_capa; u8 he_capa_len; u16 airtime_weight; struct sta_txpwr txpwr; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; }; /** * struct station_del_parameters - station deletion parameters * * Used to delete a station entry (or all stations). * * @mac: MAC address of the station to remove or NULL to remove all stations * @subtype: Management frame subtype to use for indicating removal * (10 = Disassociation, 12 = Deauthentication) * @reason_code: Reason code for the Disassociation/Deauthentication frame */ struct station_del_parameters { const u8 *mac; u8 subtype; u16 reason_code; }; /** * enum cfg80211_station_type - the type of station being modified * @CFG80211_STA_AP_CLIENT: client of an AP interface * @CFG80211_STA_AP_CLIENT_UNASSOC: client of an AP interface that is still * unassociated (update properties for this type of client is permitted) * @CFG80211_STA_AP_MLME_CLIENT: client of an AP interface that has * the AP MLME in the device * @CFG80211_STA_AP_STA: AP station on managed interface * @CFG80211_STA_IBSS: IBSS station * @CFG80211_STA_TDLS_PEER_SETUP: TDLS peer on managed interface (dummy entry * while TDLS setup is in progress, it moves out of this state when * being marked authorized; use this only if TDLS with external setup is * supported/used) * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active * entry that is operating, has been marked authorized by userspace) * @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed) * @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed) */ enum cfg80211_station_type { CFG80211_STA_AP_CLIENT, CFG80211_STA_AP_CLIENT_UNASSOC, CFG80211_STA_AP_MLME_CLIENT, CFG80211_STA_AP_STA, CFG80211_STA_IBSS, CFG80211_STA_TDLS_PEER_SETUP, CFG80211_STA_TDLS_PEER_ACTIVE, CFG80211_STA_MESH_PEER_KERNEL, CFG80211_STA_MESH_PEER_USER, }; /** * cfg80211_check_station_change - validate parameter changes * @wiphy: the wiphy this operates on * @params: the new parameters for a station * @statype: the type of station being modified * * Utility function for the @change_station driver method. Call this function * with the appropriate station type looking up the station (and checking that * it exists). It will verify whether the station change is acceptable, and if * not will return an error code. Note that it may modify the parameters for * backward compatibility reasons, so don't use them before calling this. */ int cfg80211_check_station_change(struct wiphy *wiphy, struct station_parameters *params, enum cfg80211_station_type statype); /** * enum rate_info_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. * * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval * @RATE_INFO_FLAGS_DMG: 60GHz MCS * @RATE_INFO_FLAGS_HE_MCS: HE MCS information * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), RATE_INFO_FLAGS_VHT_MCS = BIT(1), RATE_INFO_FLAGS_SHORT_GI = BIT(2), RATE_INFO_FLAGS_DMG = BIT(3), RATE_INFO_FLAGS_HE_MCS = BIT(4), RATE_INFO_FLAGS_EDMG = BIT(5), }; /** * enum rate_info_bw - rate bandwidth information * * Used by the driver to indicate the rate bandwidth. * * @RATE_INFO_BW_5: 5 MHz bandwidth * @RATE_INFO_BW_10: 10 MHz bandwidth * @RATE_INFO_BW_20: 20 MHz bandwidth * @RATE_INFO_BW_40: 40 MHz bandwidth * @RATE_INFO_BW_80: 80 MHz bandwidth * @RATE_INFO_BW_160: 160 MHz bandwidth * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation */ enum rate_info_bw { RATE_INFO_BW_20 = 0, RATE_INFO_BW_5, RATE_INFO_BW_10, RATE_INFO_BW_40, RATE_INFO_BW_80, RATE_INFO_BW_160, RATE_INFO_BW_HE_RU, }; /** * struct rate_info - bitrate information * * Information about a receiving or transmitting bitrate * * @flags: bitflag of flags from &enum rate_info_flags * @mcs: mcs index if struct describes an HT/VHT/HE rate * @legacy: bitrate in 100kbit/s for 802.11abg * @nss: number of streams (VHT & HE only) * @bw: bandwidth (from &enum rate_info_bw) * @he_gi: HE guard interval (from &enum nl80211_he_gi) * @he_dcm: HE DCM value * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc, * only valid if bw is %RATE_INFO_BW_HE_RU) * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4) */ struct rate_info { u8 flags; u8 mcs; u16 legacy; u8 nss; u8 bw; u8 he_gi; u8 he_dcm; u8 he_ru_alloc; u8 n_bonded_ch; }; /** * enum bss_param_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. * * @BSS_PARAM_FLAGS_CTS_PROT: whether CTS protection is enabled * @BSS_PARAM_FLAGS_SHORT_PREAMBLE: whether short preamble is enabled * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled */ enum bss_param_flags { BSS_PARAM_FLAGS_CTS_PROT = 1<<0, BSS_PARAM_FLAGS_SHORT_PREAMBLE = 1<<1, BSS_PARAM_FLAGS_SHORT_SLOT_TIME = 1<<2, }; /** * struct sta_bss_parameters - BSS parameters for the attached station * * Information about the currently associated BSS * * @flags: bitflag of flags from &enum bss_param_flags * @dtim_period: DTIM period for the BSS * @beacon_interval: beacon interval */ struct sta_bss_parameters { u8 flags; u8 dtim_period; u16 beacon_interval; }; /** * struct cfg80211_txq_stats - TXQ statistics for this TID * @filled: bitmap of flags using the bits of &enum nl80211_txq_stats to * indicate the relevant values in this struct are filled * @backlog_bytes: total number of bytes currently backlogged * @backlog_packets: total number of packets currently backlogged * @flows: number of new flows seen * @drops: total number of packets dropped * @ecn_marks: total number of packets marked with ECN CE * @overlimit: number of drops due to queue space overflow * @overmemory: number of drops due to memory limit overflow * @collisions: number of hash collisions * @tx_bytes: total number of bytes dequeued * @tx_packets: total number of packets dequeued * @max_flows: maximum number of flows supported */ struct cfg80211_txq_stats { u32 filled; u32 backlog_bytes; u32 backlog_packets; u32 flows; u32 drops; u32 ecn_marks; u32 overlimit; u32 overmemory; u32 collisions; u32 tx_bytes; u32 tx_packets; u32 max_flows; }; /** * struct cfg80211_tid_stats - per-TID statistics * @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to * indicate the relevant values in this struct are filled * @rx_msdu: number of received MSDUs * @tx_msdu: number of (attempted) transmitted MSDUs * @tx_msdu_retries: number of retries (not counting the first) for * transmitted MSDUs * @tx_msdu_failed: number of failed transmitted MSDUs * @txq_stats: TXQ statistics */ struct cfg80211_tid_stats { u32 filled; u64 rx_msdu; u64 tx_msdu; u64 tx_msdu_retries; u64 tx_msdu_failed; struct cfg80211_txq_stats txq_stats; }; #define IEEE80211_MAX_CHAINS 4 /** * struct station_info - station information * * Station information filled by driver for get_station() and dump_station. * * @filled: bitflag of flags using the bits of &enum nl80211_sta_info to * indicate the relevant values in this struct for them * @connected_time: time(in secs) since a station is last connected * @inactive_time: time since last station activity (tx/rx) in milliseconds * @assoc_at: bootime (ns) of the last association * @rx_bytes: bytes (size of MPDUs) received from this station * @tx_bytes: bytes (size of MPDUs) transmitted to this station * @llid: mesh local link id * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: The signal strength, type depends on the wiphy's signal_type. * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. * @signal_avg: Average signal strength, type depends on the wiphy's signal_type. * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. * @chains: bitmask for filled values in @chain_signal, @chain_signal_avg * @chain_signal: per-chain signal strength of last received packet in dBm * @chain_signal_avg: per-chain signal strength average in dBm * @txrate: current unicast bitrate from this station * @rxrate: current unicast bitrate to this station * @rx_packets: packets (MSDUs & MMPDUs) received from this station * @tx_packets: packets (MSDUs & MMPDUs) transmitted to this station * @tx_retries: cumulative retry counts (MPDUs) * @tx_failed: number of failed transmissions (MPDUs) (retries exceeded, no ACK) * @rx_dropped_misc: Dropped for un-specified reason. * @bss_param: current BSS parameters * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that * userspace can tell whether it got a consistent snapshot. * @assoc_req_ies: IEs from (Re)Association Request. * This is used only when in AP mode with drivers that do not use * user space MLME/SME implementation. The information is provided for * the cfg80211_new_sta() calls to notify user space of the IEs. * @assoc_req_ies_len: Length of assoc_req_ies buffer in octets. * @sta_flags: station flags mask & values * @beacon_loss_count: Number of times beacon loss event has triggered. * @t_offset: Time offset of the station relative to this host. * @local_pm: local mesh STA power save mode * @peer_pm: peer mesh STA power save mode * @nonpeer_pm: non-peer mesh STA power save mode * @expected_throughput: expected throughput in kbps (including 802.11 headers) * towards this station. * @rx_beacon: number of beacons received from this peer * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received * from this peer * @connected_to_gate: true if mesh STA has a path to mesh gate * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer * @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer * @airtime_weight: current airtime scheduling weight * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. * Note that this doesn't use the @filled bit, but is used if non-NULL. * @ack_signal: signal strength (in dBm) of the last ACK frame. * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has * been sent. * @rx_mpdu_count: number of MPDUs received from this station * @fcs_err_count: number of packets (MPDUs) received from this station with * an FCS error. This counter should be incremented only when TA of the * received packet with an FCS error matches the peer MAC address. * @airtime_link_metric: mesh airtime link metric. * @connected_to_as: true if mesh STA has a path to authentication server */ struct station_info { u64 filled; u32 connected_time; u32 inactive_time; u64 assoc_at; u64 rx_bytes; u64 tx_bytes; u16 llid; u16 plid; u8 plink_state; s8 signal; s8 signal_avg; u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; s8 chain_signal_avg[IEEE80211_MAX_CHAINS]; struct rate_info txrate; struct rate_info rxrate; u32 rx_packets; u32 tx_packets; u32 tx_retries; u32 tx_failed; u32 rx_dropped_misc; struct sta_bss_parameters bss_param; struct nl80211_sta_flag_update sta_flags; int generation; const u8 *assoc_req_ies; size_t assoc_req_ies_len; u32 beacon_loss_count; s64 t_offset; enum nl80211_mesh_power_mode local_pm; enum nl80211_mesh_power_mode peer_pm; enum nl80211_mesh_power_mode nonpeer_pm; u32 expected_throughput; u64 tx_duration; u64 rx_duration; u64 rx_beacon; u8 rx_beacon_signal_avg; u8 connected_to_gate; struct cfg80211_tid_stats *pertid; s8 ack_signal; s8 avg_ack_signal; u16 airtime_weight; u32 rx_mpdu_count; u32 fcs_err_count; u32 airtime_link_metric; u8 connected_to_as; }; #if IS_ENABLED(CONFIG_CFG80211) /** * cfg80211_get_station - retrieve information about a given station * @dev: the device where the station is supposed to be connected to * @mac_addr: the mac address of the station of interest * @sinfo: pointer to the structure to fill with the information * * Returns 0 on success and sinfo is filled with the available information * otherwise returns a negative error code and the content of sinfo has to be * considered undefined. */ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo); #else static inline int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) { return -ENOENT; } #endif /** * enum monitor_flags - monitor flags * * Monitor interface configuration flags. Note that these must be the bits * according to the nl80211 flags. * * @MONITOR_FLAG_CHANGED: set if the flags were changed * @MONITOR_FLAG_FCSFAIL: pass frames with bad FCS * @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP * @MONITOR_FLAG_CONTROL: pass control frames * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering * @MONITOR_FLAG_COOK_FRAMES: report frames after processing * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID, MONITOR_FLAG_FCSFAIL = 1<<NL80211_MNTR_FLAG_FCSFAIL, MONITOR_FLAG_PLCPFAIL = 1<<NL80211_MNTR_FLAG_PLCPFAIL, MONITOR_FLAG_CONTROL = 1<<NL80211_MNTR_FLAG_CONTROL, MONITOR_FLAG_OTHER_BSS = 1<<NL80211_MNTR_FLAG_OTHER_BSS, MONITOR_FLAG_COOK_FRAMES = 1<<NL80211_MNTR_FLAG_COOK_FRAMES, MONITOR_FLAG_ACTIVE = 1<<NL80211_MNTR_FLAG_ACTIVE, }; /** * enum mpath_info_flags - mesh path information flags * * Used by the driver to indicate which info in &struct mpath_info it has filled * in during get_station() or dump_station(). * * @MPATH_INFO_FRAME_QLEN: @frame_qlen filled * @MPATH_INFO_SN: @sn filled * @MPATH_INFO_METRIC: @metric filled * @MPATH_INFO_EXPTIME: @exptime filled * @MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled * @MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled * @MPATH_INFO_FLAGS: @flags filled * @MPATH_INFO_HOP_COUNT: @hop_count filled * @MPATH_INFO_PATH_CHANGE: @path_change_count filled */ enum mpath_info_flags { MPATH_INFO_FRAME_QLEN = BIT(0), MPATH_INFO_SN = BIT(1), MPATH_INFO_METRIC = BIT(2), MPATH_INFO_EXPTIME = BIT(3), MPATH_INFO_DISCOVERY_TIMEOUT = BIT(4), MPATH_INFO_DISCOVERY_RETRIES = BIT(5), MPATH_INFO_FLAGS = BIT(6), MPATH_INFO_HOP_COUNT = BIT(7), MPATH_INFO_PATH_CHANGE = BIT(8), }; /** * struct mpath_info - mesh path information * * Mesh path information filled by driver for get_mpath() and dump_mpath(). * * @filled: bitfield of flags from &enum mpath_info_flags * @frame_qlen: number of queued frames for this destination * @sn: target sequence number * @metric: metric (cost) of this mesh path * @exptime: expiration time for the mesh path from now, in msecs * @flags: mesh path flags * @discovery_timeout: total mesh path discovery timeout, in msecs * @discovery_retries: mesh path discovery retries * @generation: generation number for nl80211 dumps. * This number should increase every time the list of mesh paths * changes, i.e. when a station is added or removed, so that * userspace can tell whether it got a consistent snapshot. * @hop_count: hops to destination * @path_change_count: total number of path changes to destination */ struct mpath_info { u32 filled; u32 frame_qlen; u32 sn; u32 metric; u32 exptime; u32 discovery_timeout; u8 discovery_retries; u8 flags; u8 hop_count; u32 path_change_count; int generation; }; /** * struct bss_parameters - BSS parameters * * Used to change BSS parameters (mainly for AP mode). * * @use_cts_prot: Whether to use CTS protection * (0 = no, 1 = yes, -1 = do not change) * @use_short_preamble: Whether the use of short preambles is allowed * (0 = no, 1 = yes, -1 = do not change) * @use_short_slot_time: Whether the use of short slot time is allowed * (0 = no, 1 = yes, -1 = do not change) * @basic_rates: basic rates in IEEE 802.11 format * (or NULL for no change) * @basic_rates_len: number of basic rates * @ap_isolate: do not forward packets between connected stations * (0 = no, 1 = yes, -1 = do not change) * @ht_opmode: HT Operation mode * (u16 = opmode, -1 = do not change) * @p2p_ctwindow: P2P CT Window (-1 = no change) * @p2p_opp_ps: P2P opportunistic PS (-1 = no change) */ struct bss_parameters { int use_cts_prot; int use_short_preamble; int use_short_slot_time; const u8 *basic_rates; u8 basic_rates_len; int ap_isolate; int ht_opmode; s8 p2p_ctwindow, p2p_opp_ps; }; /** * struct mesh_config - 802.11s mesh configuration * * These parameters can be changed while the mesh is active. * * @dot11MeshRetryTimeout: the initial retry timeout in millisecond units used * by the Mesh Peering Open message * @dot11MeshConfirmTimeout: the initial retry timeout in millisecond units * used by the Mesh Peering Open message * @dot11MeshHoldingTimeout: the confirm timeout in millisecond units used by * the mesh peering management to close a mesh peering * @dot11MeshMaxPeerLinks: the maximum number of peer links allowed on this * mesh interface * @dot11MeshMaxRetries: the maximum number of peer link open retries that can * be sent to establish a new peer link instance in a mesh * @dot11MeshTTL: the value of TTL field set at a source mesh STA * @element_ttl: the value of TTL field set at a mesh STA for path selection * elements * @auto_open_plinks: whether we should automatically open peer links when we * detect compatible mesh peers * @dot11MeshNbrOffsetMaxNeighbor: the maximum number of neighbors to * synchronize to for 11s default synchronization method * @dot11MeshHWMPmaxPREQretries: the number of action frames containing a PREQ * that an originator mesh STA can send to a particular path target * @path_refresh_time: how frequently to refresh mesh paths in milliseconds * @min_discovery_timeout: the minimum length of time to wait until giving up on * a path discovery in milliseconds * @dot11MeshHWMPactivePathTimeout: the time (in TUs) for which mesh STAs * receiving a PREQ shall consider the forwarding information from the * root to be valid. (TU = time unit) * @dot11MeshHWMPpreqMinInterval: the minimum interval of time (in TUs) during * which a mesh STA can send only one action frame containing a PREQ * element * @dot11MeshHWMPperrMinInterval: the minimum interval of time (in TUs) during * which a mesh STA can send only one Action frame containing a PERR * element * @dot11MeshHWMPnetDiameterTraversalTime: the interval of time (in TUs) that * it takes for an HWMP information element to propagate across the mesh * @dot11MeshHWMPRootMode: the configuration of a mesh STA as root mesh STA * @dot11MeshHWMPRannInterval: the interval of time (in TUs) between root * announcements are transmitted * @dot11MeshGateAnnouncementProtocol: whether to advertise that this mesh * station has access to a broader network beyond the MBSS. (This is * missnamed in draft 12.0: dot11MeshGateAnnouncementProtocol set to true * only means that the station will announce others it's a mesh gate, but * not necessarily using the gate announcement protocol. Still keeping the * same nomenclature to be in sync with the spec) * @dot11MeshForwarding: whether the Mesh STA is forwarding or non-forwarding * entity (default is TRUE - forwarding entity) * @rssi_threshold: the threshold for average signal strength of candidate * station to establish a peer link * @ht_opmode: mesh HT protection mode * * @dot11MeshHWMPactivePathToRootTimeout: The time (in TUs) for which mesh STAs * receiving a proactive PREQ shall consider the forwarding information to * the root mesh STA to be valid. * * @dot11MeshHWMProotInterval: The interval of time (in TUs) between proactive * PREQs are transmitted. * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs) * during which a mesh STA can send only one Action frame containing * a PREQ element for root path confirmation. * @power_mode: The default mesh power save mode which will be the initial * setting for new peer links. * @dot11MeshAwakeWindowDuration: The duration in TUs the STA will remain awake * after transmitting its beacon. * @plink_timeout: If no tx activity is seen from a STA we've established * peering with for longer than this time (in seconds), then remove it * from the STA's list of peers. Default is 30 minutes. * @dot11MeshConnectedToMeshGate: if set to true, advertise that this STA is * connected to a mesh gate in mesh formation info. If false, the * value in mesh formation is determined by the presence of root paths * in the mesh path table * @dot11MeshNolearn: Try to avoid multi-hop path discovery (e.g. PREQ/PREP * for HWMP) if the destination is a direct neighbor. Note that this might * not be the optimal decision as a multi-hop route might be better. So * if using this setting you will likely also want to disable * dot11MeshForwarding and use another mesh routing protocol on top. */ struct mesh_config { u16 dot11MeshRetryTimeout; u16 dot11MeshConfirmTimeout; u16 dot11MeshHoldingTimeout; u16 dot11MeshMaxPeerLinks; u8 dot11MeshMaxRetries; u8 dot11MeshTTL; u8 element_ttl; bool auto_open_plinks; u32 dot11MeshNbrOffsetMaxNeighbor; u8 dot11MeshHWMPmaxPREQretries; u32 path_refresh_time; u16 min_discovery_timeout; u32 dot11MeshHWMPactivePathTimeout; u16 dot11MeshHWMPpreqMinInterval; u16 dot11MeshHWMPperrMinInterval; u16 dot11MeshHWMPnetDiameterTraversalTime; u8 dot11MeshHWMPRootMode; bool dot11MeshConnectedToMeshGate; bool dot11MeshConnectedToAuthServer; u16 dot11MeshHWMPRannInterval; bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; s32 rssi_threshold; u16 ht_opmode; u32 dot11MeshHWMPactivePathToRootTimeout; u16 dot11MeshHWMProotInterval; u16 dot11MeshHWMPconfirmationInterval; enum nl80211_mesh_power_mode power_mode; u16 dot11MeshAwakeWindowDuration; u32 plink_timeout; bool dot11MeshNolearn; }; /** * struct mesh_setup - 802.11s mesh setup configuration * @chandef: defines the channel to use * @mesh_id: the mesh ID * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes * @sync_method: which synchronization method to use * @path_sel_proto: which path selection protocol to use * @path_metric: which metric to use * @auth_id: which authentication method this mesh is using * @ie: vendor information elements (optional) * @ie_len: length of vendor information elements * @is_authenticated: this mesh requires authentication * @is_secure: this mesh uses security * @user_mpm: userspace handles all MPM functions * @dtim_period: DTIM period to use * @beacon_interval: beacon interval to use * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh * @beacon_rate: bitrate to be used for beacons * @userspace_handles_dfs: whether user space controls DFS operation, i.e. * changes the channel when a radar is detected. This is required * to operate on DFS channels. * @control_port_over_nl80211: TRUE if userspace expects to exchange control * port frames over NL80211 instead of the network interface. * * These parameters are fixed when the mesh is created. */ struct mesh_setup { struct cfg80211_chan_def chandef; const u8 *mesh_id; u8 mesh_id_len; u8 sync_method; u8 path_sel_proto; u8 path_metric; u8 auth_id; const u8 *ie; u8 ie_len; bool is_authenticated; bool is_secure; bool user_mpm; u8 dtim_period; u16 beacon_interval; int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; struct cfg80211_bitrate_mask beacon_rate; bool userspace_handles_dfs; bool control_port_over_nl80211; }; /** * struct ocb_setup - 802.11p OCB mode setup configuration * @chandef: defines the channel to use * * These parameters are fixed when connecting to the network */ struct ocb_setup { struct cfg80211_chan_def chandef; }; /** * struct ieee80211_txq_params - TX queue parameters * @ac: AC identifier * @txop: Maximum burst time in units of 32 usecs, 0 meaning disabled * @cwmin: Minimum contention window [a value of the form 2^n-1 in the range * 1..32767] * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range * 1..32767] * @aifs: Arbitration interframe space [0..255] */ struct ieee80211_txq_params { enum nl80211_ac ac; u16 txop; u16 cwmin; u16 cwmax; u8 aifs; }; /** * DOC: Scanning and BSS list handling * * The scanning process itself is fairly simple, but cfg80211 offers quite * a bit of helper functionality. To start a scan, the scan operation will * be invoked with a scan definition. This scan definition contains the * channels to scan, and the SSIDs to send probe requests for (including the * wildcard, if desired). A passive scan is indicated by having no SSIDs to * probe. Additionally, a scan request may contain extra information elements * that should be added to the probe request. The IEs are guaranteed to be * well-formed, and will not exceed the maximum length the driver advertised * in the wiphy structure. * * When scanning finds a BSS, cfg80211 needs to be notified of that, because * it is responsible for maintaining the BSS list; the driver should not * maintain a list itself. For this notification, various functions exist. * * Since drivers do not maintain a BSS list, there are also a number of * functions to search for a BSS and obtain information about it from the * BSS structure cfg80211 maintains. The BSS list is also made available * to userspace. */ /** * struct cfg80211_ssid - SSID description * @ssid: the SSID * @ssid_len: length of the ssid */ struct cfg80211_ssid { u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len; }; /** * struct cfg80211_scan_info - information about completed scan * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the * wireless device that requested the scan is connected to. If this * information is not available, this field is left zero. * @tsf_bssid: the BSSID according to which %scan_start_tsf is set. * @aborted: set to true if the scan was aborted for any reason, * userspace will be notified of that */ struct cfg80211_scan_info { u64 scan_start_tsf; u8 tsf_bssid[ETH_ALEN] __aligned(2); bool aborted; }; /** * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only * * @short_bssid: short ssid to scan for * @bssid: bssid to scan for * @channel_idx: idx of the channel in the channel array in the scan request * which the above info relvant to * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU * @short_ssid_valid: short_ssid is valid and can be used * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait * 20 TUs before starting to send probe requests. */ struct cfg80211_scan_6ghz_params { u32 short_ssid; u32 channel_idx; u8 bssid[ETH_ALEN]; bool unsolicited_probe; bool short_ssid_valid; bool psc_no_listen; }; /** * struct cfg80211_scan_request - scan request description * * @ssids: SSIDs to scan for (active scan only) * @n_ssids: number of SSIDs * @channels: channels to scan on. * @n_channels: total number of channels to scan * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @duration: how long to listen on each channel, in TUs. If * %duration_mandatory is not set, this is the maximum dwell time and * the actual dwell time may be shorter. * @duration_mandatory: if set, the scan duration must be as specified by the * %duration field. * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for * @info: (internal) information about completed scan * @notified: (internal) scan request was notified as done or aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band * @mac_addr: MAC address used with randomisation * @mac_addr_mask: MAC address mask used with randomisation, bits that * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr * @scan_6ghz: relevant for split scan request only, * true if this is the second scan request * @n_6ghz_params: number of 6 GHz params * @scan_6ghz_params: 6 GHz params * @bssid: BSSID to scan for (most commonly, the wildcard BSSID) */ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u16 duration; bool duration_mandatory; u32 flags; u32 rates[NUM_NL80211_BANDS]; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); u8 bssid[ETH_ALEN] __aligned(2); /* internal */ struct wiphy *wiphy; unsigned long scan_start; struct cfg80211_scan_info info; bool notified; bool no_cck; bool scan_6ghz; u32 n_6ghz_params; struct cfg80211_scan_6ghz_params *scan_6ghz_params; /* keep last */ struct ieee80211_channel *channels[]; }; static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) { int i; get_random_bytes(buf, ETH_ALEN); for (i = 0; i < ETH_ALEN; i++) { buf[i] &= ~mask[i]; buf[i] |= addr[i] & mask[i]; } } /** * struct cfg80211_match_set - sets of attributes to match * * @ssid: SSID to be matched; may be zero-length in case of BSSID match * or no match (RSSI only) * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied * for filtering out scan results received. Drivers advertize this support * of band specific rssi based filtering through the feature capability * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band * specific rssi thresholds take precedence over rssi_thold, if specified. * If not specified for any band, it will be assigned with rssi_thold of * corresponding matchset. */ struct cfg80211_match_set { struct cfg80211_ssid ssid; u8 bssid[ETH_ALEN]; s32 rssi_thold; s32 per_band_rssi_thold[NUM_NL80211_BANDS]; }; /** * struct cfg80211_sched_scan_plan - scan plan for scheduled scan * * @interval: interval between scheduled scan iterations. In seconds. * @iterations: number of scan iterations in this scan plan. Zero means * infinite loop. * The last scan plan will always have this parameter set to zero, * all other scan plans will have a finite number of iterations. */ struct cfg80211_sched_scan_plan { u32 interval; u32 iterations; }; /** * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment. * * @band: band of BSS which should match for RSSI level adjustment. * @delta: value of RSSI level adjustment. */ struct cfg80211_bss_select_adjust { enum nl80211_band band; s8 delta; }; /** * struct cfg80211_sched_scan_request - scheduled scan request description * * @reqid: identifies this request. * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans) * @n_ssids: number of SSIDs * @n_channels: total number of channels to scan * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @flags: bit field of flags controlling operation * @match_sets: sets of parameters to be matched for a scan result * entry to be considered valid and to be passed to the host * (others are filtered out). * If ommited, all results are passed. * @n_match_sets: number of match sets * @report_results: indicates that results were reported for this request * @wiphy: the wiphy this was for * @dev: the interface * @scan_start: start time of the scheduled scan * @channels: channels to scan * @min_rssi_thold: for drivers only supporting a single threshold, this * contains the minimum over all matchsets * @mac_addr: MAC address used with randomisation * @mac_addr_mask: MAC address mask used with randomisation, bits that * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr * @scan_plans: scan plans to be executed in this scheduled scan. Lowest * index must be executed first. * @n_scan_plans: number of scan plans, at least 1. * @rcu_head: RCU callback used to free the struct * @owner_nlportid: netlink portid of owner (if this should is a request * owned by a particular socket) * @nl_owner_dead: netlink owner socket was closed - this request be freed * @list: for keeping list of requests. * @delay: delay in seconds to use before starting the first scan * cycle. The driver may ignore this parameter and start * immediately (or at any other time), if this feature is not * supported. * @relative_rssi_set: Indicates whether @relative_rssi is set or not. * @relative_rssi: Relative RSSI threshold in dB to restrict scan result * reporting in connected state to cases where a matching BSS is determined * to have better or slightly worse RSSI than the current connected BSS. * The relative RSSI threshold values are ignored in disconnected state. * @rssi_adjust: delta dB of RSSI preference to be given to the BSSs that belong * to the specified band while deciding whether a better BSS is reported * using @relative_rssi. If delta is a negative number, the BSSs that * belong to the specified band will be penalized by delta dB in relative * comparisions. */ struct cfg80211_sched_scan_request { u64 reqid; struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u32 flags; struct cfg80211_match_set *match_sets; int n_match_sets; s32 min_rssi_thold; u32 delay; struct cfg80211_sched_scan_plan *scan_plans; int n_scan_plans; u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); bool relative_rssi_set; s8 relative_rssi; struct cfg80211_bss_select_adjust rssi_adjust; /* internal */ struct wiphy *wiphy; struct net_device *dev; unsigned long scan_start; bool report_results; struct rcu_head rcu_head; u32 owner_nlportid; bool nl_owner_dead; struct list_head list; /* keep last */ struct ieee80211_channel *channels[]; }; /** * enum cfg80211_signal_type - signal type * * @CFG80211_SIGNAL_TYPE_NONE: no signal strength information available * @CFG80211_SIGNAL_TYPE_MBM: signal strength in mBm (100*dBm) * @CFG80211_SIGNAL_TYPE_UNSPEC: signal strength, increasing from 0 through 100 */ enum cfg80211_signal_type { CFG80211_SIGNAL_TYPE_NONE, CFG80211_SIGNAL_TYPE_MBM, CFG80211_SIGNAL_TYPE_UNSPEC, }; /** * struct cfg80211_inform_bss - BSS inform data * @chan: channel the frame was received on * @scan_width: scan width that was used * @signal: signal strength value, according to the wiphy's * signal type * @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was * received; should match the time when the frame was actually * received by the device (not just by the host, in case it was * buffered on the device) and be accurate to about 10ms. * If the frame isn't buffered, just passing the return value of * ktime_get_boottime_ns() is likely appropriate. * @parent_tsf: the time at the start of reception of the first octet of the * timestamp field of the frame. The time is the TSF of the BSS specified * by %parent_bssid. * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to * the BSS that requested the scan in which the beacon/probe was received. * @chains: bitmask for filled values in @chain_signal. * @chain_signal: per-chain signal strength of last received BSS in dBm. */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; enum nl80211_bss_scan_width scan_width; s32 signal; u64 boottime_ns; u64 parent_tsf; u8 parent_bssid[ETH_ALEN] __aligned(2); u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; }; /** * struct cfg80211_bss_ies - BSS entry IE data * @tsf: TSF contained in the frame that carried these IEs * @rcu_head: internal use, for freeing * @len: length of the IEs * @from_beacon: these IEs are known to come from a beacon * @data: IE data */ struct cfg80211_bss_ies { u64 tsf; struct rcu_head rcu_head; int len; bool from_beacon; u8 data[]; }; /** * struct cfg80211_bss - BSS description * * This structure describes a BSS (which may also be a mesh network) * for use in scan results and similar. * * @channel: channel this BSS is on * @scan_width: width of the control channel * @bssid: BSSID of the BSS * @beacon_interval: the beacon interval as from the frame * @capability: the capability field in host byte order * @ies: the information elements (Note that there is no guarantee that these * are well-formed!); this is a pointer to either the beacon_ies or * proberesp_ies depending on whether Probe Response frame has been * received. It is always non-%NULL. * @beacon_ies: the information elements from the last Beacon frame * (implementation note: if @hidden_beacon_bss is set this struct doesn't * own the beacon_ies, but they're just pointers to the ones from the * @hidden_beacon_bss struct) * @proberesp_ies: the information elements from the last Probe Response frame * @hidden_beacon_bss: in case this BSS struct represents a probe response from * a BSS that hides the SSID in its beacon, this points to the BSS struct * that holds the beacon data. @beacon_ies is still valid, of course, and * points to the same data as hidden_beacon_bss->beacon_ies in that case. * @transmitted_bss: pointer to the transmitted BSS, if this is a * non-transmitted one (multi-BSSID support) * @nontrans_list: list of non-transmitted BSS, if this is a transmitted one * (multi-BSSID support) * @signal: signal strength value (type depends on the wiphy's signal_type) * @chains: bitmask for filled values in @chain_signal. * @chain_signal: per-chain signal strength of last received BSS in dBm. * @bssid_index: index in the multiple BSS set * @max_bssid_indicator: max number of members in the BSS set * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes */ struct cfg80211_bss { struct ieee80211_channel *channel; enum nl80211_bss_scan_width scan_width; const struct cfg80211_bss_ies __rcu *ies; const struct cfg80211_bss_ies __rcu *beacon_ies; const struct cfg80211_bss_ies __rcu *proberesp_ies; struct cfg80211_bss *hidden_beacon_bss; struct cfg80211_bss *transmitted_bss; struct list_head nontrans_list; s32 signal; u16 beacon_interval; u16 capability; u8 bssid[ETH_ALEN]; u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 bssid_index; u8 max_bssid_indicator; u8 priv[] __aligned(sizeof(void *)); }; /** * ieee80211_bss_get_elem - find element with given ID * @bss: the bss to search * @id: the element ID * * Note that the return value is an RCU-protected pointer, so * rcu_read_lock() must be held when calling this function. * Return: %NULL if not found. */ const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id); /** * ieee80211_bss_get_ie - find IE with given ID * @bss: the bss to search * @id: the element ID * * Note that the return value is an RCU-protected pointer, so * rcu_read_lock() must be held when calling this function. * Return: %NULL if not found. */ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) { return (void *)ieee80211_bss_get_elem(bss, id); } /** * struct cfg80211_auth_request - Authentication request data * * This structure provides information needed to complete IEEE 802.11 * authentication. * * @bss: The BSS to authenticate with, the callee must obtain a reference * to it if it needs to keep it. * @auth_type: Authentication type (algorithm) * @ie: Extra IEs to add to Authentication frame or %NULL * @ie_len: Length of ie buffer in octets * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication * @key: WEP key for shared key authentication * @auth_data: Fields and elements in Authentication frames. This contains * the authentication frame body (non-IE and IE data), excluding the * Authentication algorithm number, i.e., starting at the Authentication * transaction sequence number field. * @auth_data_len: Length of auth_data buffer in octets */ struct cfg80211_auth_request { struct cfg80211_bss *bss; const u8 *ie; size_t ie_len; enum nl80211_auth_type auth_type; const u8 *key; u8 key_len, key_idx; const u8 *auth_data; size_t auth_data_len; }; /** * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association. * * @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n) * @ASSOC_REQ_DISABLE_VHT: Disable VHT * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external * authentication capability. Drivers can offload authentication to * userspace if this flag is set. Only applicable for cfg80211_connect() * request (connect callback). */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), ASSOC_REQ_DISABLE_VHT = BIT(1), ASSOC_REQ_USE_RRM = BIT(2), CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), }; /** * struct cfg80211_assoc_request - (Re)Association request data * * This structure provides information needed to complete IEEE 802.11 * (re)association. * @bss: The BSS to associate with. If the call is successful the driver is * given a reference that it must give back to cfg80211_send_rx_assoc() * or to cfg80211_assoc_timeout(). To ensure proper refcounting, new * association requests while already associating must be rejected. * @ie: Extra IEs to add to (Re)Association Request frame or %NULL * @ie_len: Length of ie buffer in octets * @use_mfp: Use management frame protection (IEEE 802.11w) in this association * @crypto: crypto settings * @prev_bssid: previous BSSID, if not %NULL use reassociate frame. This is used * to indicate a request to reassociate within the ESS instead of a request * do the initial association with the ESS. When included, this is set to * the BSSID of the current association, i.e., to the value that is * included in the Current AP address field of the Reassociation Request * frame. * @flags: See &enum cfg80211_assoc_req_flags * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT capability override * @vht_capa_mask: VHT capability mask indicating which fields to use * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or * %NULL if FILS is not used. * @fils_kek_len: Length of fils_kek in octets * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association * Request/Response frame or %NULL if FILS is not used. This field starts * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. * @s1g_capa: S1G capability override * @s1g_capa_mask: S1G capability override mask */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; const u8 *ie, *prev_bssid; size_t ie_len; struct cfg80211_crypto_settings crypto; bool use_mfp; u32 flags; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; const u8 *fils_kek; size_t fils_kek_len; const u8 *fils_nonces; struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask; }; /** * struct cfg80211_deauth_request - Deauthentication request data * * This structure provides information needed to complete IEEE 802.11 * deauthentication. * * @bssid: the BSSID of the BSS to deauthenticate from * @ie: Extra IEs to add to Deauthentication frame or %NULL * @ie_len: Length of ie buffer in octets * @reason_code: The reason code for the deauthentication * @local_state_change: if set, change local state only and * do not set a deauth frame */ struct cfg80211_deauth_request { const u8 *bssid; const u8 *ie; size_t ie_len; u16 reason_code; bool local_state_change; }; /** * struct cfg80211_disassoc_request - Disassociation request data * * This structure provides information needed to complete IEEE 802.11 * disassociation. * * @bss: the BSS to disassociate from * @ie: Extra IEs to add to Disassociation frame or %NULL * @ie_len: Length of ie buffer in octets * @reason_code: The reason code for the disassociation * @local_state_change: This is a request for a local state only, i.e., no * Disassociation frame is to be transmitted. */ struct cfg80211_disassoc_request { struct cfg80211_bss *bss; const u8 *ie; size_t ie_len; u16 reason_code; bool local_state_change; }; /** * struct cfg80211_ibss_params - IBSS parameters * * This structure defines the IBSS parameters for the join_ibss() * method. * * @ssid: The SSID, will always be non-null. * @ssid_len: The length of the SSID, will always be non-zero. * @bssid: Fixed BSSID requested, maybe be %NULL, if set do not * search for IBSSs with a different BSSID. * @chandef: defines the channel to use if no other IBSS to join can be found * @channel_fixed: The channel should be fixed -- do not search for * IBSSs to join on other channels. * @ie: information element(s) to include in the beacon * @ie_len: length of that * @beacon_interval: beacon interval to use * @privacy: this is a protected network, keys will be configured * after joining * @control_port: whether user space controls IEEE 802.1X port, i.e., * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. * @control_port_over_nl80211: TRUE if userspace expects to exchange control * port frames over NL80211 instead of the network interface. * @userspace_handles_dfs: whether user space controls DFS operation, i.e. * changes the channel when a radar is detected. This is required * to operate on DFS channels. * @basic_rates: bitmap of basic rates to use when creating the IBSS * @mcast_rate: per-band multicast rate index + 1 (0: disabled) * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key */ struct cfg80211_ibss_params { const u8 *ssid; const u8 *bssid; struct cfg80211_chan_def chandef; const u8 *ie; u8 ssid_len, ie_len; u16 beacon_interval; u32 basic_rates; bool channel_fixed; bool privacy; bool control_port; bool control_port_over_nl80211; bool userspace_handles_dfs; int mcast_rate[NUM_NL80211_BANDS]; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct key_params *wep_keys; int wep_tx_key; }; /** * struct cfg80211_bss_selection - connection parameters for BSS selection. * * @behaviour: requested BSS selection behaviour. * @param: parameters for requestion behaviour. * @band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF. * @adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST. */ struct cfg80211_bss_selection { enum nl80211_bss_select_attr behaviour; union { enum nl80211_band band_pref; struct cfg80211_bss_select_adjust adjust; } param; }; /** * struct cfg80211_connect_params - Connection parameters * * This structure provides information needed to complete IEEE 802.11 * authentication and association. * * @channel: The channel to use or %NULL if not specified (auto-select based * on scan results) * @channel_hint: The channel of the recommended BSS for initial connection or * %NULL if not specified * @bssid: The AP BSSID or %NULL if not specified (auto-select based on scan * results) * @bssid_hint: The recommended AP BSSID for initial connection to the BSS or * %NULL if not specified. Unlike the @bssid parameter, the driver is * allowed to ignore this @bssid_hint if it has knowledge of a better BSS * to use. * @ssid: SSID * @ssid_len: Length of ssid in octets * @auth_type: Authentication type (algorithm) * @ie: IEs for association request * @ie_len: Length of assoc_ie in octets * @privacy: indicates whether privacy-enabled APs should be used * @mfp: indicate whether management frame protection is used * @crypto: crypto settings * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication * @key: WEP key for shared key authentication * @flags: See &enum cfg80211_assoc_req_flags * @bg_scan_period: Background scan period in seconds * or -1 to indicate that default value is to be used. * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT Capability overrides * @vht_capa_mask: The bits of vht_capa which are to be used. * @pbss: if set, connect to a PCP instead of AP. Valid for DMG * networks. * @bss_select: criteria to be used for BSS selection. * @prev_bssid: previous BSSID, if not %NULL use reassociate frame. This is used * to indicate a request to reassociate within the ESS instead of a request * do the initial association with the ESS. When included, this is set to * the BSSID of the current association, i.e., to the value that is * included in the Current AP address field of the Reassociation Request * frame. * @fils_erp_username: EAP re-authentication protocol (ERP) username part of the * NAI or %NULL if not specified. This is used to construct FILS wrapped * data IE. * @fils_erp_username_len: Length of @fils_erp_username in octets. * @fils_erp_realm: EAP re-authentication protocol (ERP) realm part of NAI or * %NULL if not specified. This specifies the domain name of ER server and * is used to construct FILS wrapped data IE. * @fils_erp_realm_len: Length of @fils_erp_realm in octets. * @fils_erp_next_seq_num: The next sequence number to use in the FILS ERP * messages. This is also used to construct FILS wrapped data IE. * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional * keys in FILS or %NULL if not specified. * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets. * @want_1x: indicates user-space supports and wants to use 802.1X driver * offload of 4-way handshake. * @edmg: define the EDMG channels. * This may specify multiple channels and bonding options for the driver * to choose from, based on BSS configuration. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; struct ieee80211_channel *channel_hint; const u8 *bssid; const u8 *bssid_hint; const u8 *ssid; size_t ssid_len; enum nl80211_auth_type auth_type; const u8 *ie; size_t ie_len; bool privacy; enum nl80211_mfp mfp; struct cfg80211_crypto_settings crypto; const u8 *key; u8 key_len, key_idx; u32 flags; int bg_scan_period; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa; struct ieee80211_vht_cap vht_capa_mask; bool pbss; struct cfg80211_bss_selection bss_select; const u8 *prev_bssid; const u8 *fils_erp_username; size_t fils_erp_username_len; const u8 *fils_erp_realm; size_t fils_erp_realm_len; u16 fils_erp_next_seq_num; const u8 *fils_erp_rrk; size_t fils_erp_rrk_len; bool want_1x; struct ieee80211_edmg edmg; }; /** * enum cfg80211_connect_params_changed - Connection parameters being updated * * This enum provides information of all connect parameters that * have to be updated as part of update_connect_params() call. * * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated * @UPDATE_FILS_ERP_INFO: Indicates that FILS connection parameters (realm, * username, erp sequence number and rrk) are updated * @UPDATE_AUTH_TYPE: Indicates that authentication type is updated */ enum cfg80211_connect_params_changed { UPDATE_ASSOC_IES = BIT(0), UPDATE_FILS_ERP_INFO = BIT(1), UPDATE_AUTH_TYPE = BIT(2), }; /** * enum wiphy_params_flags - set_wiphy_params bitfield values * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed * @WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed * @WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed * @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed * @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed * @WIPHY_PARAM_DYN_ACK: dynack has been enabled * @WIPHY_PARAM_TXQ_LIMIT: TXQ packet limit has been changed * @WIPHY_PARAM_TXQ_MEMORY_LIMIT: TXQ memory limit has been changed * @WIPHY_PARAM_TXQ_QUANTUM: TXQ scheduler quantum */ enum wiphy_params_flags { WIPHY_PARAM_RETRY_SHORT = 1 << 0, WIPHY_PARAM_RETRY_LONG = 1 << 1, WIPHY_PARAM_FRAG_THRESHOLD = 1 << 2, WIPHY_PARAM_RTS_THRESHOLD = 1 << 3, WIPHY_PARAM_COVERAGE_CLASS = 1 << 4, WIPHY_PARAM_DYN_ACK = 1 << 5, WIPHY_PARAM_TXQ_LIMIT = 1 << 6, WIPHY_PARAM_TXQ_MEMORY_LIMIT = 1 << 7, WIPHY_PARAM_TXQ_QUANTUM = 1 << 8, }; #define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256 /* The per TXQ device queue limit in airtime */ #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000 #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000 /* The per interface airtime threshold to switch to lower queue limit */ #define IEEE80211_AQL_THRESHOLD 24000 /** * struct cfg80211_pmksa - PMK Security Association * * This structure is passed to the set/del_pmksa() method for PMKSA * caching. * * @bssid: The AP's BSSID (may be %NULL). * @pmkid: The identifier to refer a PMKSA. * @pmk: The PMK for the PMKSA identified by @pmkid. This is used for key * derivation by a FILS STA. Otherwise, %NULL. * @pmk_len: Length of the @pmk. The length of @pmk can differ depending on * the hash algorithm used to generate this. * @ssid: SSID to specify the ESS within which a PMKSA is valid when using FILS * cache identifier (may be %NULL). * @ssid_len: Length of the @ssid in octets. * @cache_id: 2-octet cache identifier advertized by a FILS AP identifying the * scope of PMKSA. This is valid only if @ssid_len is non-zero (may be * %NULL). * @pmk_lifetime: Maximum lifetime for PMKSA in seconds * (dot11RSNAConfigPMKLifetime) or 0 if not specified. * The configured PMKSA must not be used for PMKSA caching after * expiration and any keys derived from this PMK become invalid on * expiration, i.e., the current association must be dropped if the PMK * used for it expires. * @pmk_reauth_threshold: Threshold time for reauthentication (percentage of * PMK lifetime, dot11RSNAConfigPMKReauthThreshold) or 0 if not specified. * Drivers are expected to trigger a full authentication instead of using * this PMKSA for caching when reassociating to a new BSS after this * threshold to generate a new PMK before the current one expires. */ struct cfg80211_pmksa { const u8 *bssid; const u8 *pmkid; const u8 *pmk; size_t pmk_len; const u8 *ssid; size_t ssid_len; const u8 *cache_id; u32 pmk_lifetime; u8 pmk_reauth_threshold; }; /** * struct cfg80211_pkt_pattern - packet pattern * @mask: bitmask where to match pattern and where to ignore bytes, * one bit per byte, in same format as nl80211 * @pattern: bytes to match where bitmask is 1 * @pattern_len: length of pattern (in bytes) * @pkt_offset: packet offset (in bytes) * * Internal note: @mask and @pattern are allocated in one chunk of * memory, free @mask only! */ struct cfg80211_pkt_pattern { const u8 *mask, *pattern; int pattern_len; int pkt_offset; }; /** * struct cfg80211_wowlan_tcp - TCP connection parameters * * @sock: (internal) socket for source port allocation * @src: source IP address * @dst: destination IP address * @dst_mac: destination MAC address * @src_port: source port * @dst_port: destination port * @payload_len: data payload length * @payload: data payload buffer * @payload_seq: payload sequence stamping configuration * @data_interval: interval at which to send data packets * @wake_len: wakeup payload match length * @wake_data: wakeup payload match data * @wake_mask: wakeup payload match mask * @tokens_size: length of the tokens buffer * @payload_tok: payload token usage configuration */ struct cfg80211_wowlan_tcp { struct socket *sock; __be32 src, dst; u16 src_port, dst_port; u8 dst_mac[ETH_ALEN]; int payload_len; const u8 *payload; struct nl80211_wowlan_tcp_data_seq payload_seq; u32 data_interval; u32 wake_len; const u8 *wake_data, *wake_mask; u32 tokens_size; /* must be last, variable member */ struct nl80211_wowlan_tcp_data_token payload_tok; }; /** * struct cfg80211_wowlan - Wake on Wireless-LAN support info * * This structure defines the enabled WoWLAN triggers for the device. * @any: wake up on any activity -- special trigger if device continues * operating as normal during suspend * @disconnect: wake up if getting disconnected * @magic_pkt: wake up on receiving magic packet * @patterns: wake up on receiving packet matching a pattern * @n_patterns: number of patterns * @gtk_rekey_failure: wake up on GTK rekey failure * @eap_identity_req: wake up on EAP identity request packet * @four_way_handshake: wake up on 4-way handshake * @rfkill_release: wake up when rfkill is released * @tcp: TCP connection establishment/wakeup parameters, see nl80211.h. * NULL if not configured. * @nd_config: configuration for the scan to be used for net detect wake. */ struct cfg80211_wowlan { bool any, disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release; struct cfg80211_pkt_pattern *patterns; struct cfg80211_wowlan_tcp *tcp; int n_patterns; struct cfg80211_sched_scan_request *nd_config; }; /** * struct cfg80211_coalesce_rules - Coalesce rule parameters * * This structure defines coalesce rule for the device. * @delay: maximum coalescing delay in msecs. * @condition: condition for packet coalescence. * see &enum nl80211_coalesce_condition. * @patterns: array of packet patterns * @n_patterns: number of patterns */ struct cfg80211_coalesce_rules { int delay; enum nl80211_coalesce_condition condition; struct cfg80211_pkt_pattern *patterns; int n_patterns; }; /** * struct cfg80211_coalesce - Packet coalescing settings * * This structure defines coalescing settings. * @rules: array of coalesce rules * @n_rules: number of rules */ struct cfg80211_coalesce { struct cfg80211_coalesce_rules *rules; int n_rules; }; /** * struct cfg80211_wowlan_nd_match - information about the match * * @ssid: SSID of the match that triggered the wake up * @n_channels: Number of channels where the match occurred. This * value may be zero if the driver can't report the channels. * @channels: center frequencies of the channels where a match * occurred (in MHz) */ struct cfg80211_wowlan_nd_match { struct cfg80211_ssid ssid; int n_channels; u32 channels[]; }; /** * struct cfg80211_wowlan_nd_info - net detect wake up information * * @n_matches: Number of match information instances provided in * @matches. This value may be zero if the driver can't provide * match information. * @matches: Array of pointers to matches containing information about * the matches that triggered the wake up. */ struct cfg80211_wowlan_nd_info { int n_matches; struct cfg80211_wowlan_nd_match *matches[]; }; /** * struct cfg80211_wowlan_wakeup - wakeup report * @disconnect: woke up by getting disconnected * @magic_pkt: woke up by receiving magic packet * @gtk_rekey_failure: woke up by GTK rekey failure * @eap_identity_req: woke up by EAP identity request packet * @four_way_handshake: woke up by 4-way handshake * @rfkill_release: woke up by rfkill being released * @pattern_idx: pattern that caused wakeup, -1 if not due to pattern * @packet_present_len: copied wakeup packet data * @packet_len: original wakeup packet length * @packet: The packet causing the wakeup, if any. * @packet_80211: For pattern match, magic packet and other data * frame triggers an 802.3 frame should be reported, for * disconnect due to deauth 802.11 frame. This indicates which * it is. * @tcp_match: TCP wakeup packet received * @tcp_connlost: TCP connection lost or failed to establish * @tcp_nomoretokens: TCP data ran out of tokens * @net_detect: if not %NULL, woke up because of net detect */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release, packet_80211, tcp_match, tcp_connlost, tcp_nomoretokens; s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; struct cfg80211_wowlan_nd_info *net_detect; }; /** * struct cfg80211_gtk_rekey_data - rekey data * @kek: key encryption key (@kek_len bytes) * @kck: key confirmation key (@kck_len bytes) * @replay_ctr: replay counter (NL80211_REPLAY_CTR_LEN bytes) * @kek_len: length of kek * @kck_len length of kck * @akm: akm (oui, id) */ struct cfg80211_gtk_rekey_data { const u8 *kek, *kck, *replay_ctr; u32 akm; u8 kek_len, kck_len; }; /** * struct cfg80211_update_ft_ies_params - FT IE Information * * This structure provides information needed to update the fast transition IE * * @md: The Mobility Domain ID, 2 Octet value * @ie: Fast Transition IEs * @ie_len: Length of ft_ie in octets */ struct cfg80211_update_ft_ies_params { u16 md; const u8 *ie; size_t ie_len; }; /** * struct cfg80211_mgmt_tx_params - mgmt tx parameters * * This structure provides information needed to transmit a mgmt frame * * @chan: channel to use * @offchan: indicates wether off channel operation is required * @wait: duration for ROC * @buf: buffer to transmit * @len: buffer length * @no_cck: don't use cck rates for this frame * @dont_wait_for_ack: tells the low level not to wait for an ack * @n_csa_offsets: length of csa_offsets array * @csa_offsets: array of all the csa offsets in the frame */ struct cfg80211_mgmt_tx_params { struct ieee80211_channel *chan; bool offchan; unsigned int wait; const u8 *buf; size_t len; bool no_cck; bool dont_wait_for_ack; int n_csa_offsets; const u16 *csa_offsets; }; /** * struct cfg80211_dscp_exception - DSCP exception * * @dscp: DSCP value that does not adhere to the user priority range definition * @up: user priority value to which the corresponding DSCP value belongs */ struct cfg80211_dscp_exception { u8 dscp; u8 up; }; /** * struct cfg80211_dscp_range - DSCP range definition for user priority * * @low: lowest DSCP value of this user priority range, inclusive * @high: highest DSCP value of this user priority range, inclusive */ struct cfg80211_dscp_range { u8 low; u8 high; }; /* QoS Map Set element length defined in IEEE Std 802.11-2012, 8.4.2.97 */ #define IEEE80211_QOS_MAP_MAX_EX 21 #define IEEE80211_QOS_MAP_LEN_MIN 16 #define IEEE80211_QOS_MAP_LEN_MAX \ (IEEE80211_QOS_MAP_LEN_MIN + 2 * IEEE80211_QOS_MAP_MAX_EX) /** * struct cfg80211_qos_map - QoS Map Information * * This struct defines the Interworking QoS map setting for DSCP values * * @num_des: number of DSCP exceptions (0..21) * @dscp_exception: optionally up to maximum of 21 DSCP exceptions from * the user priority DSCP range definition * @up: DSCP range definition for a particular user priority */ struct cfg80211_qos_map { u8 num_des; struct cfg80211_dscp_exception dscp_exception[IEEE80211_QOS_MAP_MAX_EX]; struct cfg80211_dscp_range up[8]; }; /** * struct cfg80211_nan_conf - NAN configuration * * This struct defines NAN configuration parameters * * @master_pref: master preference (1 - 255) * @bands: operating bands, a bitmap of &enum nl80211_band values. * For instance, for NL80211_BAND_2GHZ, bit 0 would be set * (i.e. BIT(NL80211_BAND_2GHZ)). */ struct cfg80211_nan_conf { u8 master_pref; u8 bands; }; /** * enum cfg80211_nan_conf_changes - indicates changed fields in NAN * configuration * * @CFG80211_NAN_CONF_CHANGED_PREF: master preference * @CFG80211_NAN_CONF_CHANGED_BANDS: operating bands */ enum cfg80211_nan_conf_changes { CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), CFG80211_NAN_CONF_CHANGED_BANDS = BIT(1), }; /** * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter * * @filter: the content of the filter * @len: the length of the filter */ struct cfg80211_nan_func_filter { const u8 *filter; u8 len; }; /** * struct cfg80211_nan_func - a NAN function * * @type: &enum nl80211_nan_function_type * @service_id: the service ID of the function * @publish_type: &nl80211_nan_publish_type * @close_range: if true, the range should be limited. Threshold is * implementation specific. * @publish_bcast: if true, the solicited publish should be broadcasted * @subscribe_active: if true, the subscribe is active * @followup_id: the instance ID for follow up * @followup_reqid: the requestor instance ID for follow up * @followup_dest: MAC address of the recipient of the follow up * @ttl: time to live counter in DW. * @serv_spec_info: Service Specific Info * @serv_spec_info_len: Service Specific Info length * @srf_include: if true, SRF is inclusive * @srf_bf: Bloom Filter * @srf_bf_len: Bloom Filter length * @srf_bf_idx: Bloom Filter index * @srf_macs: SRF MAC addresses * @srf_num_macs: number of MAC addresses in SRF * @rx_filters: rx filters that are matched with corresponding peer's tx_filter * @tx_filters: filters that should be transmitted in the SDF. * @num_rx_filters: length of &rx_filters. * @num_tx_filters: length of &tx_filters. * @instance_id: driver allocated id of the function. * @cookie: unique NAN function identifier. */ struct cfg80211_nan_func { enum nl80211_nan_function_type type; u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN]; u8 publish_type; bool close_range; bool publish_bcast; bool subscribe_active; u8 followup_id; u8 followup_reqid; struct mac_address followup_dest; u32 ttl; const u8 *serv_spec_info; u8 serv_spec_info_len; bool srf_include; const u8 *srf_bf; u8 srf_bf_len; u8 srf_bf_idx; struct mac_address *srf_macs; int srf_num_macs; struct cfg80211_nan_func_filter *rx_filters; struct cfg80211_nan_func_filter *tx_filters; u8 num_tx_filters; u8 num_rx_filters; u8 instance_id; u64 cookie; }; /** * struct cfg80211_pmk_conf - PMK configuration * * @aa: authenticator address * @pmk_len: PMK length in bytes. * @pmk: the PMK material * @pmk_r0_name: PMK-R0 Name. NULL if not applicable (i.e., the PMK * is not PMK-R0). When pmk_r0_name is not NULL, the pmk field * holds PMK-R0. */ struct cfg80211_pmk_conf { const u8 *aa; u8 pmk_len; const u8 *pmk; const u8 *pmk_r0_name; }; /** * struct cfg80211_external_auth_params - Trigger External authentication. * * Commonly used across the external auth request and event interfaces. * * @action: action type / trigger for external authentication. Only significant * for the authentication request event interface (driver to user space). * @bssid: BSSID of the peer with which the authentication has * to happen. Used by both the authentication request event and * authentication response command interface. * @ssid: SSID of the AP. Used by both the authentication request event and * authentication response command interface. * @key_mgmt_suite: AKM suite of the respective authentication. Used by the * authentication request event interface. * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication, * use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you * the real status code for failures. Used only for the authentication * response command interface (user space to driver). * @pmkid: The identifier to refer a PMKSA. */ struct cfg80211_external_auth_params { enum nl80211_external_auth_action action; u8 bssid[ETH_ALEN] __aligned(2); struct cfg80211_ssid ssid; unsigned int key_mgmt_suite; u16 status; const u8 *pmkid; }; /** * struct cfg80211_ftm_responder_stats - FTM responder statistics * * @filled: bitflag of flags using the bits of &enum nl80211_ftm_stats to * indicate the relevant values in this struct for them * @success_num: number of FTM sessions in which all frames were successfully * answered * @partial_num: number of FTM sessions in which part of frames were * successfully answered * @failed_num: number of failed FTM sessions * @asap_num: number of ASAP FTM sessions * @non_asap_num: number of non-ASAP FTM sessions * @total_duration_ms: total sessions durations - gives an indication * of how much time the responder was busy * @unknown_triggers_num: number of unknown FTM triggers - triggers from * initiators that didn't finish successfully the negotiation phase with * the responder * @reschedule_requests_num: number of FTM reschedule requests - initiator asks * for a new scheduling although it already has scheduled FTM slot * @out_of_window_triggers_num: total FTM triggers out of scheduled window */ struct cfg80211_ftm_responder_stats { u32 filled; u32 success_num; u32 partial_num; u32 failed_num; u32 asap_num; u32 non_asap_num; u64 total_duration_ms; u32 unknown_triggers_num; u32 reschedule_requests_num; u32 out_of_window_triggers_num; }; /** * struct cfg80211_pmsr_ftm_result - FTM result * @failure_reason: if this measurement failed (PMSR status is * %NL80211_PMSR_STATUS_FAILURE), this gives a more precise * reason than just "failure" * @burst_index: if reporting partial results, this is the index * in [0 .. num_bursts-1] of the burst that's being reported * @num_ftmr_attempts: number of FTM request frames transmitted * @num_ftmr_successes: number of FTM request frames acked * @busy_retry_time: if failure_reason is %NL80211_PMSR_FTM_FAILURE_PEER_BUSY, * fill this to indicate in how many seconds a retry is deemed possible * by the responder * @num_bursts_exp: actual number of bursts exponent negotiated * @burst_duration: actual burst duration negotiated * @ftms_per_burst: actual FTMs per burst negotiated * @lci_len: length of LCI information (if present) * @civicloc_len: length of civic location information (if present) * @lci: LCI data (may be %NULL) * @civicloc: civic location data (may be %NULL) * @rssi_avg: average RSSI over FTM action frames reported * @rssi_spread: spread of the RSSI over FTM action frames reported * @tx_rate: bitrate for transmitted FTM action frame response * @rx_rate: bitrate of received FTM action frame * @rtt_avg: average of RTTs measured (must have either this or @dist_avg) * @rtt_variance: variance of RTTs measured (note that standard deviation is * the square root of the variance) * @rtt_spread: spread of the RTTs measured * @dist_avg: average of distances (mm) measured * (must have either this or @rtt_avg) * @dist_variance: variance of distances measured (see also @rtt_variance) * @dist_spread: spread of distances measured (see also @rtt_spread) * @num_ftmr_attempts_valid: @num_ftmr_attempts is valid * @num_ftmr_successes_valid: @num_ftmr_successes is valid * @rssi_avg_valid: @rssi_avg is valid * @rssi_spread_valid: @rssi_spread is valid * @tx_rate_valid: @tx_rate is valid * @rx_rate_valid: @rx_rate is valid * @rtt_avg_valid: @rtt_avg is valid * @rtt_variance_valid: @rtt_variance is valid * @rtt_spread_valid: @rtt_spread is valid * @dist_avg_valid: @dist_avg is valid * @dist_variance_valid: @dist_variance is valid * @dist_spread_valid: @dist_spread is valid */ struct cfg80211_pmsr_ftm_result { const u8 *lci; const u8 *civicloc; unsigned int lci_len; unsigned int civicloc_len; enum nl80211_peer_measurement_ftm_failure_reasons failure_reason; u32 num_ftmr_attempts, num_ftmr_successes; s16 burst_index; u8 busy_retry_time; u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; s32 rssi_avg; s32 rssi_spread; struct rate_info tx_rate, rx_rate; s64 rtt_avg; s64 rtt_variance; s64 rtt_spread; s64 dist_avg; s64 dist_variance; s64 dist_spread; u16 num_ftmr_attempts_valid:1, num_ftmr_successes_valid:1, rssi_avg_valid:1, rssi_spread_valid:1, tx_rate_valid:1, rx_rate_valid:1, rtt_avg_valid:1, rtt_variance_valid:1, rtt_spread_valid:1, dist_avg_valid:1, dist_variance_valid:1, dist_spread_valid:1; }; /** * struct cfg80211_pmsr_result - peer measurement result * @addr: address of the peer * @host_time: host time (use ktime_get_boottime() adjust to the time when the * measurement was made) * @ap_tsf: AP's TSF at measurement time * @status: status of the measurement * @final: if reporting partial results, mark this as the last one; if not * reporting partial results always set this flag * @ap_tsf_valid: indicates the @ap_tsf value is valid * @type: type of the measurement reported, note that we only support reporting * one type at a time, but you can report multiple results separately and * they're all aggregated for userspace. */ struct cfg80211_pmsr_result { u64 host_time, ap_tsf; enum nl80211_peer_measurement_status status; u8 addr[ETH_ALEN]; u8 final:1, ap_tsf_valid:1; enum nl80211_peer_measurement_type type; union { struct cfg80211_pmsr_ftm_result ftm; }; }; /** * struct cfg80211_pmsr_ftm_request_peer - FTM request data * @requested: indicates FTM is requested * @preamble: frame preamble to use * @burst_period: burst period to use * @asap: indicates to use ASAP mode * @num_bursts_exp: number of bursts exponent * @burst_duration: burst duration * @ftms_per_burst: number of FTMs per burst * @ftmr_retries: number of retries for FTM request * @request_lci: request LCI information * @request_civicloc: request civic location information * @trigger_based: use trigger based ranging for the measurement * If neither @trigger_based nor @non_trigger_based is set, * EDCA based ranging will be used. * @non_trigger_based: use non trigger based ranging for the measurement * If neither @trigger_based nor @non_trigger_based is set, * EDCA based ranging will be used. * * See also nl80211 for the respective attribute documentation. */ struct cfg80211_pmsr_ftm_request_peer { enum nl80211_preamble preamble; u16 burst_period; u8 requested:1, asap:1, request_lci:1, request_civicloc:1, trigger_based:1, non_trigger_based:1; u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; u8 ftmr_retries; }; /** * struct cfg80211_pmsr_request_peer - peer data for a peer measurement request * @addr: MAC address * @chandef: channel to use * @report_ap_tsf: report the associated AP's TSF * @ftm: FTM data, see &struct cfg80211_pmsr_ftm_request_peer */ struct cfg80211_pmsr_request_peer { u8 addr[ETH_ALEN]; struct cfg80211_chan_def chandef; u8 report_ap_tsf:1; struct cfg80211_pmsr_ftm_request_peer ftm; }; /** * struct cfg80211_pmsr_request - peer measurement request * @cookie: cookie, set by cfg80211 * @nl_portid: netlink portid - used by cfg80211 * @drv_data: driver data for this request, if required for aborting, * not otherwise freed or anything by cfg80211 * @mac_addr: MAC address used for (randomised) request * @mac_addr_mask: MAC address mask used for randomisation, bits that * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr * @list: used by cfg80211 to hold on to the request * @timeout: timeout (in milliseconds) for the whole operation, if * zero it means there's no timeout * @n_peers: number of peers to do measurements with * @peers: per-peer measurement request data */ struct cfg80211_pmsr_request { u64 cookie; void *drv_data; u32 n_peers; u32 nl_portid; u32 timeout; u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); struct list_head list; struct cfg80211_pmsr_request_peer peers[]; }; /** * struct cfg80211_update_owe_info - OWE Information * * This structure provides information needed for the drivers to offload OWE * (Opportunistic Wireless Encryption) processing to the user space. * * Commonly used across update_owe_info request and event interfaces. * * @peer: MAC address of the peer device for which the OWE processing * has to be done. * @status: status code, %WLAN_STATUS_SUCCESS for successful OWE info * processing, use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space * cannot give you the real status code for failures. Used only for * OWE update request command interface (user space to driver). * @ie: IEs obtained from the peer or constructed by the user space. These are * the IEs of the remote peer in the event from the host driver and * the constructed IEs by the user space in the request interface. * @ie_len: Length of IEs in octets. */ struct cfg80211_update_owe_info { u8 peer[ETH_ALEN] __aligned(2); u16 status; const u8 *ie; size_t ie_len; }; /** * struct mgmt_frame_regs - management frame registrations data * @global_stypes: bitmap of management frame subtypes registered * for the entire device * @interface_stypes: bitmap of management frame subtypes registered * for the given interface * @global_mcast_rx: mcast RX is needed globally for these subtypes * @interface_mcast_stypes: mcast RX is needed on this interface * for these subtypes */ struct mgmt_frame_regs { u32 global_stypes, interface_stypes; u32 global_mcast_stypes, interface_mcast_stypes; }; /** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks * in order to handle configuration requests on their interfaces. * * All callbacks except where otherwise noted should return 0 * on success or a negative error code. * * All operations are currently invoked under rtnl for consistency with the * wireless extensions but this is subject to reevaluation as soon as this * code is used more widely and we have a first user without wext. * * @suspend: wiphy device needs to be suspended. The variable @wow will * be %NULL or contain the enabled Wake-on-Wireless triggers that are * configured for the device. * @resume: wiphy device needs to be resumed * @set_wakeup: Called when WoWLAN is enabled/disabled, use this callback * to call device_set_wakeup_enable() to enable/disable wakeup from * the device. * * @add_virtual_intf: create a new virtual interface with the given name, * must set the struct wireless_dev's iftype. Beware: You must create * the new netdev in the wiphy's network namespace! Returns the struct * wireless_dev, or an ERR_PTR. For P2P device wdevs, the driver must * also set the address member in the wdev. * * @del_virtual_intf: remove the virtual interface * * @change_virtual_intf: change type/configuration of virtual interface, * keep the struct wireless_dev's iftype updated. * * @add_key: add a key with the given parameters. @mac_addr will be %NULL * when adding a group key. * * @get_key: get information about the key with the given parameters. * @mac_addr will be %NULL when requesting information for a group * key. All pointers given to the @callback function need not be valid * after it returns. This function should return an error if it is * not possible to retrieve the key, -ENOENT if it doesn't exist. * * @del_key: remove a key given the @mac_addr (%NULL for a group key) * and @key_index, return -ENOENT if the key doesn't exist. * * @set_default_key: set the default key on an interface * * @set_default_mgmt_key: set the default management frame key on an interface * * @set_default_beacon_key: set the default Beacon frame key on an interface * * @set_rekey_data: give the data necessary for GTK rekeying to the driver * * @start_ap: Start acting in AP mode defined by the parameters. * @change_beacon: Change the beacon parameters for an access point mode * interface. This should reject the call when AP mode wasn't started. * @stop_ap: Stop being an AP, including stopping beaconing. * * @add_station: Add a new station. * @del_station: Remove a station * @change_station: Modify a given station. Note that flags changes are not much * validated in cfg80211, in particular the auth/assoc/authorized flags * might come to the driver in invalid combinations -- make sure to check * them, also against the existing state! Drivers must call * cfg80211_check_station_change() to validate the information. * @get_station: get station information for the station identified by @mac * @dump_station: dump station callback -- resume dump at index @idx * * @add_mpath: add a fixed mesh path * @del_mpath: delete a given mesh path * @change_mpath: change a given mesh path * @get_mpath: get a mesh path for the given parameters * @dump_mpath: dump mesh path callback -- resume dump at index @idx * @get_mpp: get a mesh proxy path for the given parameters * @dump_mpp: dump mesh proxy path callback -- resume dump at index @idx * @join_mesh: join the mesh network with the specified parameters * (invoked with the wireless_dev mutex held) * @leave_mesh: leave the current mesh network * (invoked with the wireless_dev mutex held) * * @get_mesh_config: Get the current mesh configuration * * @update_mesh_config: Update mesh parameters on a running mesh. * The mask is a bitfield which tells us which parameters to * set, and which to leave alone. * * @change_bss: Modify parameters for a given BSS. * * @set_txq_params: Set TX queue parameters * * @libertas_set_mesh_channel: Only for backward compatibility for libertas, * as it doesn't implement join_mesh and needs to set the channel to * join the mesh instead. * * @set_monitor_channel: Set the monitor mode channel for the device. If other * interfaces are active this callback should reject the configuration. * If no interfaces are active or the device is down, the channel should * be stored for when a monitor interface becomes active. * * @scan: Request to do a scan. If returning zero, the scan request is given * the driver, and will be valid until passed to cfg80211_scan_done(). * For scan results, call cfg80211_inform_bss(); you can call this outside * the scan/scan_done bracket too. * @abort_scan: Tell the driver to abort an ongoing scan. The driver shall * indicate the status of the scan through cfg80211_scan_done(). * * @auth: Request to authenticate with the specified peer * (invoked with the wireless_dev mutex held) * @assoc: Request to (re)associate with the specified peer * (invoked with the wireless_dev mutex held) * @deauth: Request to deauthenticate from the specified peer * (invoked with the wireless_dev mutex held) * @disassoc: Request to disassociate from the specified peer * (invoked with the wireless_dev mutex held) * * @connect: Connect to the ESS with the specified parameters. When connected, * call cfg80211_connect_result()/cfg80211_connect_bss() with status code * %WLAN_STATUS_SUCCESS. If the connection fails for some reason, call * cfg80211_connect_result()/cfg80211_connect_bss() with the status code * from the AP or cfg80211_connect_timeout() if no frame with status code * was received. * The driver is allowed to roam to other BSSes within the ESS when the * other BSS matches the connect parameters. When such roaming is initiated * by the driver, the driver is expected to verify that the target matches * the configured security parameters and to use Reassociation Request * frame instead of Association Request frame. * The connect function can also be used to request the driver to perform a * specific roam when connected to an ESS. In that case, the prev_bssid * parameter is set to the BSSID of the currently associated BSS as an * indication of requesting reassociation. * In both the driver-initiated and new connect() call initiated roaming * cases, the result of roaming is indicated with a call to * cfg80211_roamed(). (invoked with the wireless_dev mutex held) * @update_connect_params: Update the connect parameters while connected to a * BSS. The updated parameters can be used by driver/firmware for * subsequent BSS selection (roaming) decisions and to form the * Authentication/(Re)Association Request frames. This call does not * request an immediate disassociation or reassociation with the current * BSS, i.e., this impacts only subsequent (re)associations. The bits in * changed are defined in &enum cfg80211_connect_params_changed. * (invoked with the wireless_dev mutex held) * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if * connection is in progress. Once done, call cfg80211_disconnected() in * case connection was already established (invoked with the * wireless_dev mutex held), otherwise call cfg80211_connect_timeout(). * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call * cfg80211_ibss_joined(), also call that function when changing BSSID due * to a merge. * (invoked with the wireless_dev mutex held) * @leave_ibss: Leave the IBSS. * (invoked with the wireless_dev mutex held) * * @set_mcast_rate: Set the specified multicast rate (only if vif is in ADHOC or * MESH mode) * * @set_wiphy_params: Notify that wiphy parameters have changed; * @changed bitfield (see &enum wiphy_params_flags) describes which values * have changed. The actual parameter values are available in * struct wiphy. If returning an error, no value should be changed. * * @set_tx_power: set the transmit power according to the parameters, * the power passed is in mBm, to get dBm use MBM_TO_DBM(). The * wdev may be %NULL if power was set for the wiphy, and will * always be %NULL unless the driver supports per-vif TX power * (as advertised by the nl80211 feature flag.) * @get_tx_power: store the current TX power into the dbm variable; * return 0 if successful * * @set_wds_peer: set the WDS peer for a WDS interface * * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting * functions to adjust rfkill hw state * * @dump_survey: get site survey information. * * @remain_on_channel: Request the driver to remain awake on the specified * channel for the specified duration to complete an off-channel * operation (e.g., public action frame exchange). When the driver is * ready on the requested channel, it must indicate this with an event * notification by calling cfg80211_ready_on_channel(). * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. * @mgmt_tx: Transmit a management frame. * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management * frame on another channel * * @testmode_cmd: run a test mode command; @wdev may be %NULL * @testmode_dump: Implement a test mode dump. The cb->args[2] and up may be * used by the function, but 0 and 1 must not be touched. Additionally, * return error codes other than -ENOBUFS and -ENOENT will terminate the * dump and return to userspace with an error, so be careful. If any data * was passed in from userspace then the data/len arguments will be present * and point to the data contained in %NL80211_ATTR_TESTDATA. * * @set_bitrate_mask: set the bitrate mask configuration * * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac * devices running firmwares capable of generating the (re) association * RSN IE. It allows for faster roaming between WPA2 BSSIDs. * @del_pmksa: Delete a cached PMKID. * @flush_pmksa: Flush all cached PMKIDs. * @set_power_mgmt: Configure WLAN power management. A timeout value of -1 * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. * After configuration, the driver should (soon) send an event indicating * the current level is above/below the configured threshold; this may * need some care when the configuration is changed (without first being * disabled.) * @set_cqm_rssi_range_config: Configure two RSSI thresholds in the * connection quality monitor. An event is to be sent only when the * signal level is found to be outside the two values. The driver should * set %NL80211_EXT_FEATURE_CQM_RSSI_LIST if this method is implemented. * If it is provided then there's no point providing @set_cqm_rssi_config. * @set_cqm_txe_config: Configure connection quality monitor TX error * thresholds. * @sched_scan_start: Tell the driver to start a scheduled scan. * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan with * given request id. This call must stop the scheduled scan and be ready * for starting a new one before it returns, i.e. @sched_scan_start may be * called immediately after that again and should not fail in that case. * The driver should not call cfg80211_sched_scan_stopped() for a requested * stop (when this method returns 0). * * @update_mgmt_frame_registrations: Notify the driver that management frame * registrations were updated. The callback is allowed to sleep. * * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device. * Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may * reject TX/RX mask combinations they cannot support by returning -EINVAL * (also see nl80211.h @NL80211_ATTR_WIPHY_ANTENNA_TX). * * @get_antenna: Get current antenna configuration from device (tx_ant, rx_ant). * * @tdls_mgmt: Transmit a TDLS management frame. * @tdls_oper: Perform a high-level TDLS operation (e.g. TDLS link setup). * * @probe_client: probe an associated client, must return a cookie that it * later passes to cfg80211_probe_status(). * * @set_noack_map: Set the NoAck Map for the TIDs. * * @get_channel: Get the current operating channel for the virtual interface. * For monitor interfaces, it should return %NULL unless there's a single * current monitoring channel. * * @start_p2p_device: Start the given P2P device. * @stop_p2p_device: Stop the given P2P device. * * @set_mac_acl: Sets MAC address control list in AP and P2P GO mode. * Parameters include ACL policy, an array of MAC address of stations * and the number of MAC addresses. If there is already a list in driver * this new list replaces the existing one. Driver has to clear its ACL * when number of MAC addresses entries is passed as 0. Drivers which * advertise the support for MAC based ACL have to implement this callback. * * @start_radar_detection: Start radar detection in the driver. * * @end_cac: End running CAC, probably because a related CAC * was finished on another phy. * * @update_ft_ies: Provide updated Fast BSS Transition information to the * driver. If the SME is in the driver/firmware, this information can be * used in building Authentication and Reassociation Request frames. * * @crit_proto_start: Indicates a critical protocol needs more link reliability * for a given duration (milliseconds). The protocol is provided so the * driver can take the most appropriate actions. * @crit_proto_stop: Indicates critical protocol no longer needs increased link * reliability. This operation can not fail. * @set_coalesce: Set coalesce parameters. * * @channel_switch: initiate channel-switch procedure (with CSA). Driver is * responsible for veryfing if the switch is possible. Since this is * inherently tricky driver may decide to disconnect an interface later * with cfg80211_stop_iface(). This doesn't mean driver can accept * everything. It should do it's best to verify requests and reject them * as soon as possible. * * @set_qos_map: Set QoS mapping information to the driver * * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the * given interface This is used e.g. for dynamic HT 20/40 MHz channel width * changes during the lifetime of the BSS. * * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device * with the given parameters; action frame exchange has been handled by * userspace so this just has to modify the TX path to take the TS into * account. * If the admitted time is 0 just validate the parameters to make sure * the session can be created at all; it is valid to just always return * success for that but that may result in inefficient behaviour (handshake * with the peer followed by immediate teardown when the addition is later * rejected) * @del_tx_ts: remove an existing TX TS * * @join_ocb: join the OCB network with the specified parameters * (invoked with the wireless_dev mutex held) * @leave_ocb: leave the current OCB network * (invoked with the wireless_dev mutex held) * * @tdls_channel_switch: Start channel-switching with a TDLS peer. The driver * is responsible for continually initiating channel-switching operations * and returning to the base channel for communication with the AP. * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both * peers must be on the base channel when the call completes. * @start_nan: Start the NAN interface. * @stop_nan: Stop the NAN interface. * @add_nan_func: Add a NAN function. Returns negative value on failure. * On success @nan_func ownership is transferred to the driver and * it may access it outside of the scope of this function. The driver * should free the @nan_func when no longer needed by calling * cfg80211_free_nan_func(). * On success the driver should assign an instance_id in the * provided @nan_func. * @del_nan_func: Delete a NAN function. * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS * * @get_txq_stats: Get TXQ stats for interface or phy. If wdev is %NULL, this * function should return phy stats, and interface stats otherwise. * * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake. * If not deleted through @del_pmk the PMK remains valid until disconnect * upon which the driver should clear it. * (invoked with the wireless_dev mutex held) * @del_pmk: delete the previously configured PMK for the given authenticator. * (invoked with the wireless_dev mutex held) * * @external_auth: indicates result of offloaded authentication processing from * user space * * @tx_control_port: TX a control port frame (EAPoL). The noencrypt parameter * tells the driver that the frame should not be encrypted. * * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. * Statistics should be cumulative, currently no way to reset is provided. * @start_pmsr: start peer measurement (e.g. FTM) * @abort_pmsr: abort peer measurement * * @update_owe_info: Provide updated OWE info to driver. Driver implementing SME * but offloading OWE processing to the user space will get the updated * DH IE through this interface. * * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame * and overrule HWMP path selection algorithm. * @set_tid_config: TID specific configuration, this can be peer or BSS specific * This callback may sleep. * @reset_tid_config: Reset TID specific configuration for the peer, for the * given TIDs. This callback may sleep. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); int (*resume)(struct wiphy *wiphy); void (*set_wakeup)(struct wiphy *wiphy, bool enabled); struct wireless_dev * (*add_virtual_intf)(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, struct vif_params *params); int (*del_virtual_intf)(struct wiphy *wiphy, struct wireless_dev *wdev); int (*change_virtual_intf)(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool unicast, bool multicast); int (*set_default_mgmt_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); int (*set_default_beacon_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_beacon_data *info); int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev); int (*add_station)(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params); int (*del_station)(struct wiphy *wiphy, struct net_device *dev, struct station_del_parameters *params); int (*change_station)(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params); int (*get_station)(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo); int (*dump_station)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo); int (*add_mpath)(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop); int (*del_mpath)(struct wiphy *wiphy, struct net_device *dev, const u8 *dst); int (*change_mpath)(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop); int (*get_mpath)(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); int (*get_mpp)(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *mpp, struct mpath_info *pinfo); int (*dump_mpp)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *mpp, struct mpath_info *pinfo); int (*get_mesh_config)(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf); int (*update_mesh_config)(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf); int (*join_mesh)(struct wiphy *wiphy, struct net_device *dev, const struct mesh_config *conf, const struct mesh_setup *setup); int (*leave_mesh)(struct wiphy *wiphy, struct net_device *dev); int (*join_ocb)(struct wiphy *wiphy, struct net_device *dev, struct ocb_setup *setup); int (*leave_ocb)(struct wiphy *wiphy, struct net_device *dev); int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); int (*set_txq_params)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params); int (*libertas_set_mesh_channel)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan); int (*set_monitor_channel)(struct wiphy *wiphy, struct cfg80211_chan_def *chandef); int (*scan)(struct wiphy *wiphy, struct cfg80211_scan_request *request); void (*abort_scan)(struct wiphy *wiphy, struct wireless_dev *wdev); int (*auth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req); int (*assoc)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_request *req); int (*deauth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_deauth_request *req); int (*disassoc)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req); int (*connect)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); int (*update_connect_params)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme, u32 changed); int (*disconnect)(struct wiphy *wiphy, struct net_device *dev, u16 reason_code); int (*join_ibss)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params); int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); int (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev, int rate[NUM_NL80211_BANDS]); int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); void (*rfkill_poll)(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE int (*testmode_cmd)(struct wiphy *wiphy, struct wireless_dev *wdev, void *data, int len); int (*testmode_dump)(struct wiphy *wiphy, struct sk_buff *skb, struct netlink_callback *cb, void *data, int len); #endif int (*set_bitrate_mask)(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, const struct cfg80211_bitrate_mask *mask); int (*dump_survey)(struct wiphy *wiphy, struct net_device *netdev, int idx, struct survey_info *info); int (*set_pmksa)(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa); int (*del_pmksa)(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa); int (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev); int (*remain_on_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, unsigned int duration, u64 *cookie); int (*cancel_remain_on_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); int (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie); int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); int (*set_cqm_rssi_config)(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); int (*set_c