1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 // SPDX-License-Identifier: GPL-2.0-or-later /* * libata-scsi.c - helper library for ATA * * Copyright 2003-2004 Red Hat, Inc. All rights reserved. * Copyright 2003-2004 Jeff Garzik * * libata documentation is available via 'make {ps|pdf}docs', * as Documentation/driver-api/libata.rst * * Hardware documentation available from * - http://www.t10.org/ * - http://www.t13.org/ */ #include <linux/compat.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/spinlock.h> #include <linux/export.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_device.h> #include <scsi/scsi_tcq.h> #include <scsi/scsi_transport.h> #include <linux/libata.h> #include <linux/hdreg.h> #include <linux/uaccess.h> #include <linux/suspend.h> #include <asm/unaligned.h> #include <linux/ioprio.h> #include <linux/of.h> #include "libata.h" #include "libata-transport.h" #define ATA_SCSI_RBUF_SIZE 576 static DEFINE_SPINLOCK(ata_scsi_rbuf_lock); static u8 ata_scsi_rbuf[ATA_SCSI_RBUF_SIZE]; typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc); static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev); #define RW_RECOVERY_MPAGE 0x1 #define RW_RECOVERY_MPAGE_LEN 12 #define CACHE_MPAGE 0x8 #define CACHE_MPAGE_LEN 20 #define CONTROL_MPAGE 0xa #define CONTROL_MPAGE_LEN 12 #define ALL_MPAGES 0x3f #define ALL_SUB_MPAGES 0xff static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = { RW_RECOVERY_MPAGE, RW_RECOVERY_MPAGE_LEN - 2, (1 << 7), /* AWRE */ 0, /* read retry count */ 0, 0, 0, 0, 0, /* write retry count */ 0, 0, 0 }; static const u8 def_cache_mpage[CACHE_MPAGE_LEN] = { CACHE_MPAGE, CACHE_MPAGE_LEN - 2, 0, /* contains WCE, needs to be 0 for logic */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* contains DRA, needs to be 0 for logic */ 0, 0, 0, 0, 0, 0, 0 }; static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = { CONTROL_MPAGE, CONTROL_MPAGE_LEN - 2, 2, /* DSENSE=0, GLTSD=1 */ 0, /* [QAM+QERR may be 1, see 05-359r1] */ 0, 0, 0, 0, 0xff, 0xff, 0, 30 /* extended self test time, see 05-359r1 */ }; static ssize_t ata_scsi_park_show(struct device *device, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(device); struct ata_port *ap; struct ata_link *link; struct ata_device *dev; unsigned long now; unsigned int msecs; int rc = 0; ap = ata_shost_to_port(sdev->host); spin_lock_irq(ap->lock); dev = ata_scsi_find_dev(ap, sdev); if (!dev) { rc = -ENODEV; goto unlock; } if (dev->flags & ATA_DFLAG_NO_UNLOAD) { rc = -EOPNOTSUPP; goto unlock; } link = dev->link; now = jiffies; if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS && link->eh_context.unloaded_mask & (1 << dev->devno) && time_after(dev->unpark_deadline, now)) msecs = jiffies_to_msecs(dev->unpark_deadline - now); else msecs = 0; unlock: spin_unlock_irq(ap->lock); return rc ? rc : snprintf(buf, 20, "%u\n", msecs); } static ssize_t ata_scsi_park_store(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { struct scsi_device *sdev = to_scsi_device(device); struct ata_port *ap; struct ata_device *dev; long int input; unsigned long flags; int rc; rc = kstrtol(buf, 10, &input); if (rc) return rc; if (input < -2) return -EINVAL; if (input > ATA_TMOUT_MAX_PARK) { rc = -EOVERFLOW; input = ATA_TMOUT_MAX_PARK; } ap = ata_shost_to_port(sdev->host); spin_lock_irqsave(ap->lock, flags); dev = ata_scsi_find_dev(ap, sdev); if (unlikely(!dev)) { rc = -ENODEV; goto unlock; } if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) { rc = -EOPNOTSUPP; goto unlock; } if (input >= 0) { if (dev->flags & ATA_DFLAG_NO_UNLOAD) { rc = -EOPNOTSUPP; goto unlock; } dev->unpark_deadline = ata_deadline(jiffies, input); dev->link->eh_info.dev_action[dev->devno] |= ATA_EH_PARK; ata_port_schedule_eh(ap); complete(&ap->park_req_pending); } else { switch (input) { case -1: dev->flags &= ~ATA_DFLAG_NO_UNLOAD; break; case -2: dev->flags |= ATA_DFLAG_NO_UNLOAD; break; } } unlock: spin_unlock_irqrestore(ap->lock, flags); return rc ? rc : len; } DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE); if (!cmd) return; cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(d_sense, cmd->sense_buffer, sk, asc, ascq); } void ata_scsi_set_sense_information(struct ata_device *dev, struct scsi_cmnd *cmd, const struct ata_taskfile *tf) { u64 information; if (!cmd) return; information = ata_tf_read_block(tf, dev); if (information == U64_MAX) return; scsi_set_sense_information(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, information); } static void ata_scsi_set_invalid_field(struct ata_device *dev, struct scsi_cmnd *cmd, u16 field, u8 bit) { ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in CDB" */ scsi_set_sense_field_pointer(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, field, bit, 1); } static void ata_scsi_set_invalid_parameter(struct ata_device *dev, struct scsi_cmnd *cmd, u16 field) { /* "Invalid field in parameter list" */ ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x26, 0x0); scsi_set_sense_field_pointer(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, field, 0xff, 0); } struct device_attribute *ata_common_sdev_attrs[] = { &dev_attr_unload_heads, NULL }; EXPORT_SYMBOL_GPL(ata_common_sdev_attrs); /** * ata_std_bios_param - generic bios head/sector/cylinder calculator used by sd. * @sdev: SCSI device for which BIOS geometry is to be determined * @bdev: block device associated with @sdev * @capacity: capacity of SCSI device * @geom: location to which geometry will be output * * Generic bios head/sector/cylinder calculator * used by sd. Most BIOSes nowadays expect a XXX/255/16 (CHS) * mapping. Some situations may arise where the disk is not * bootable if this is not used. * * LOCKING: * Defined by the SCSI layer. We don't really care. * * RETURNS: * Zero. */ int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) { geom[0] = 255; geom[1] = 63; sector_div(capacity, 255*63); geom[2] = capacity; return 0; } EXPORT_SYMBOL_GPL(ata_std_bios_param); /** * ata_scsi_unlock_native_capacity - unlock native capacity * @sdev: SCSI device to adjust device capacity for * * This function is called if a partition on @sdev extends beyond * the end of the device. It requests EH to unlock HPA. * * LOCKING: * Defined by the SCSI layer. Might sleep. */ void ata_scsi_unlock_native_capacity(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev; unsigned long flags; spin_lock_irqsave(ap->lock, flags); dev = ata_scsi_find_dev(ap, sdev); if (dev && dev->n_sectors < dev->n_native_sectors) { dev->flags |= ATA_DFLAG_UNLOCK_HPA; dev->link->eh_info.action |= ATA_EH_RESET; ata_port_schedule_eh(ap); } spin_unlock_irqrestore(ap->lock, flags); ata_port_wait_eh(ap); } EXPORT_SYMBOL_GPL(ata_scsi_unlock_native_capacity); /** * ata_get_identity - Handler for HDIO_GET_IDENTITY ioctl * @ap: target port * @sdev: SCSI device to get identify data for * @arg: User buffer area for identify data * * LOCKING: * Defined by the SCSI layer. We don't really care. * * RETURNS: * Zero on success, negative errno on error. */ static int ata_get_identity(struct ata_port *ap, struct scsi_device *sdev, void __user *arg) { struct ata_device *dev = ata_scsi_find_dev(ap, sdev); u16 __user *dst = arg; char buf[40]; if (!dev) return -ENOMSG; if (copy_to_user(dst, dev->id, ATA_ID_WORDS * sizeof(u16))) return -EFAULT; ata_id_string(dev->id, buf, ATA_ID_PROD, ATA_ID_PROD_LEN); if (copy_to_user(dst + ATA_ID_PROD, buf, ATA_ID_PROD_LEN)) return -EFAULT; ata_id_string(dev->id, buf, ATA_ID_FW_REV, ATA_ID_FW_REV_LEN); if (copy_to_user(dst + ATA_ID_FW_REV, buf, ATA_ID_FW_REV_LEN)) return -EFAULT; ata_id_string(dev->id, buf, ATA_ID_SERNO, ATA_ID_SERNO_LEN); if (copy_to_user(dst + ATA_ID_SERNO, buf, ATA_ID_SERNO_LEN)) return -EFAULT; return 0; } /** * ata_cmd_ioctl - Handler for HDIO_DRIVE_CMD ioctl * @scsidev: Device to which we are issuing command * @arg: User provided data for issuing command * * LOCKING: * Defined by the SCSI layer. We don't really care. * * RETURNS: * Zero on success, negative errno on error. */ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) { int rc = 0; u8 sensebuf[SCSI_SENSE_BUFFERSIZE]; u8 scsi_cmd[MAX_COMMAND_SIZE]; u8 args[4], *argbuf = NULL; int argsize = 0; enum dma_data_direction data_dir; struct scsi_sense_hdr sshdr; int cmd_result; if (arg == NULL) return -EINVAL; if (copy_from_user(args, arg, sizeof(args))) return -EFAULT; memset(sensebuf, 0, sizeof(sensebuf)); memset(scsi_cmd, 0, sizeof(scsi_cmd)); if (args[3]) { argsize = ATA_SECT_SIZE * args[3]; argbuf = kmalloc(argsize, GFP_KERNEL); if (argbuf == NULL) { rc = -ENOMEM; goto error; } scsi_cmd[1] = (4 << 1); /* PIO Data-in */ scsi_cmd[2] = 0x0e; /* no off.line or cc, read from dev, block count in sector count field */ data_dir = DMA_FROM_DEVICE; } else { scsi_cmd[1] = (3 << 1); /* Non-data */ scsi_cmd[2] = 0x20; /* cc but no off.line or data xfer */ data_dir = DMA_NONE; } scsi_cmd[0] = ATA_16; scsi_cmd[4] = args[2]; if (args[0] == ATA_CMD_SMART) { /* hack -- ide driver does this too */ scsi_cmd[6] = args[3]; scsi_cmd[8] = args[1]; scsi_cmd[10] = ATA_SMART_LBAM_PASS; scsi_cmd[12] = ATA_SMART_LBAH_PASS; } else { scsi_cmd[6] = args[1]; } scsi_cmd[14] = args[0]; /* Good values for timeout and retries? Values below from scsi_ioctl_send_command() for default case... */ cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize, sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL); if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */ u8 *desc = sensebuf + 8; cmd_result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */ /* If we set cc then ATA pass-through will cause a * check condition even if no error. Filter that. */ if (cmd_result & SAM_STAT_CHECK_CONDITION) { if (sshdr.sense_key == RECOVERED_ERROR && sshdr.asc == 0 && sshdr.ascq == 0x1d) cmd_result &= ~SAM_STAT_CHECK_CONDITION; } /* Send userspace a few ATA registers (same as drivers/ide) */ if (sensebuf[0] == 0x72 && /* format is "descriptor" */ desc[0] == 0x09) { /* code is "ATA Descriptor" */ args[0] = desc[13]; /* status */ args[1] = desc[3]; /* error */ args[2] = desc[5]; /* sector count (0:7) */ if (copy_to_user(arg, args, sizeof(args))) rc = -EFAULT; } } if (cmd_result) { rc = -EIO; goto error; } if ((argbuf) && copy_to_user(arg + sizeof(args), argbuf, argsize)) rc = -EFAULT; error: kfree(argbuf); return rc; } /** * ata_task_ioctl - Handler for HDIO_DRIVE_TASK ioctl * @scsidev: Device to which we are issuing command * @arg: User provided data for issuing command * * LOCKING: * Defined by the SCSI layer. We don't really care. * * RETURNS: * Zero on success, negative errno on error. */ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg) { int rc = 0; u8 sensebuf[SCSI_SENSE_BUFFERSIZE]; u8 scsi_cmd[MAX_COMMAND_SIZE]; u8 args[7]; struct scsi_sense_hdr sshdr; int cmd_result; if (arg == NULL) return -EINVAL; if (copy_from_user(args, arg, sizeof(args))) return -EFAULT; memset(sensebuf, 0, sizeof(sensebuf)); memset(scsi_cmd, 0, sizeof(scsi_cmd)); scsi_cmd[0] = ATA_16; scsi_cmd[1] = (3 << 1); /* Non-data */ scsi_cmd[2] = 0x20; /* cc but no off.line or data xfer */ scsi_cmd[4] = args[1]; scsi_cmd[6] = args[2]; scsi_cmd[8] = args[3]; scsi_cmd[10] = args[4]; scsi_cmd[12] = args[5]; scsi_cmd[13] = args[6] & 0x4f; scsi_cmd[14] = args[0]; /* Good values for timeout and retries? Values below from scsi_ioctl_send_command() for default case... */ cmd_result = scsi_execute(scsidev, scsi_cmd, DMA_NONE, NULL, 0, sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL); if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */ u8 *desc = sensebuf + 8; cmd_result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */ /* If we set cc then ATA pass-through will cause a * check condition even if no error. Filter that. */ if (cmd_result & SAM_STAT_CHECK_CONDITION) { if (sshdr.sense_key == RECOVERED_ERROR && sshdr.asc == 0 && sshdr.ascq == 0x1d) cmd_result &= ~SAM_STAT_CHECK_CONDITION; } /* Send userspace ATA registers */ if (sensebuf[0] == 0x72 && /* format is "descriptor" */ desc[0] == 0x09) {/* code is "ATA Descriptor" */ args[0] = desc[13]; /* status */ args[1] = desc[3]; /* error */ args[2] = desc[5]; /* sector count (0:7) */ args[3] = desc[7]; /* lbal */ args[4] = desc[9]; /* lbam */ args[5] = desc[11]; /* lbah */ args[6] = desc[12]; /* select */ if (copy_to_user(arg, args, sizeof(args))) rc = -EFAULT; } } if (cmd_result) { rc = -EIO; goto error; } error: return rc; } static int ata_ioc32(struct ata_port *ap) { if (ap->flags & ATA_FLAG_PIO_DMA) return 1; if (ap->pflags & ATA_PFLAG_PIO32) return 1; return 0; } /* * This handles both native and compat commands, so anything added * here must have a compatible argument, or check in_compat_syscall() */ int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, unsigned int cmd, void __user *arg) { unsigned long val; int rc = -EINVAL; unsigned long flags; switch (cmd) { case HDIO_GET_32BIT: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); #ifdef CONFIG_COMPAT if (in_compat_syscall()) return put_user(val, (compat_ulong_t __user *)arg); #endif return put_user(val, (unsigned long __user *)arg); case HDIO_SET_32BIT: val = (unsigned long) arg; rc = 0; spin_lock_irqsave(ap->lock, flags); if (ap->pflags & ATA_PFLAG_PIO32CHANGE) { if (val) ap->pflags |= ATA_PFLAG_PIO32; else ap->pflags &= ~ATA_PFLAG_PIO32; } else { if (val != ata_ioc32(ap)) rc = -EINVAL; } spin_unlock_irqrestore(ap->lock, flags); return rc; case HDIO_GET_IDENTITY: return ata_get_identity(ap, scsidev, arg); case HDIO_DRIVE_CMD: if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; return ata_cmd_ioctl(scsidev, arg); case HDIO_DRIVE_TASK: if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; return ata_task_ioctl(scsidev, arg); default: rc = -ENOTTY; break; } return rc; } EXPORT_SYMBOL_GPL(ata_sas_scsi_ioctl); int ata_scsi_ioctl(struct scsi_device *scsidev, unsigned int cmd, void __user *arg) { return ata_sas_scsi_ioctl(ata_shost_to_port(scsidev->host), scsidev, cmd, arg); } EXPORT_SYMBOL_GPL(ata_scsi_ioctl); /** * ata_scsi_qc_new - acquire new ata_queued_cmd reference * @dev: ATA device to which the new command is attached * @cmd: SCSI command that originated this ATA command * * Obtain a reference to an unused ata_queued_cmd structure, * which is the basic libata structure representing a single * ATA command sent to the hardware. * * If a command was available, fill in the SCSI-specific * portions of the structure with information on the * current command. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Command allocated, or %NULL if none available. */ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev, struct scsi_cmnd *cmd) { struct ata_queued_cmd *qc; qc = ata_qc_new_init(dev, cmd->request->tag); if (qc) { qc->scsicmd = cmd; qc->scsidone = cmd->scsi_done; qc->sg = scsi_sglist(cmd); qc->n_elem = scsi_sg_count(cmd); if (cmd->request->rq_flags & RQF_QUIET) qc->flags |= ATA_QCFLAG_QUIET; } else { cmd->result = (DID_OK << 16) | (QUEUE_FULL << 1); cmd->scsi_done(cmd); } return qc; } static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; qc->extrabytes = scmd->extra_len; qc->nbytes = scsi_bufflen(scmd) + qc->extrabytes; } /** * ata_dump_status - user friendly display of error info * @id: id of the port in question * @tf: ptr to filled out taskfile * * Decode and dump the ATA error/status registers for the user so * that they have some idea what really happened at the non * make-believe layer. * * LOCKING: * inherited from caller */ static void ata_dump_status(unsigned id, struct ata_taskfile *tf) { u8 stat = tf->command, err = tf->feature; pr_warn("ata%u: status=0x%02x { ", id, stat); if (stat & ATA_BUSY) { pr_cont("Busy }\n"); /* Data is not valid in this case */ } else { if (stat & ATA_DRDY) pr_cont("DriveReady "); if (stat & ATA_DF) pr_cont("DeviceFault "); if (stat & ATA_DSC) pr_cont("SeekComplete "); if (stat & ATA_DRQ) pr_cont("DataRequest "); if (stat & ATA_CORR) pr_cont("CorrectedError "); if (stat & ATA_SENSE) pr_cont("Sense "); if (stat & ATA_ERR) pr_cont("Error "); pr_cont("}\n"); if (err) { pr_warn("ata%u: error=0x%02x { ", id, err); if (err & ATA_ABORTED) pr_cont("DriveStatusError "); if (err & ATA_ICRC) { if (err & ATA_ABORTED) pr_cont("BadCRC "); else pr_cont("Sector "); } if (err & ATA_UNC) pr_cont("UncorrectableError "); if (err & ATA_IDNF) pr_cont("SectorIdNotFound "); if (err & ATA_TRK0NF) pr_cont("TrackZeroNotFound "); if (err & ATA_AMNF) pr_cont("AddrMarkNotFound "); pr_cont("}\n"); } } } /** * ata_to_sense_error - convert ATA error to SCSI error * @id: ATA device number * @drv_stat: value contained in ATA status register * @drv_err: value contained in ATA error register * @sk: the sense key we'll fill out * @asc: the additional sense code we'll fill out * @ascq: the additional sense code qualifier we'll fill out * @verbose: be verbose * * Converts an ATA error into a SCSI error. Fill out pointers to * SK, ASC, and ASCQ bytes for later use in fixed or descriptor * format sense blocks. * * LOCKING: * spin_lock_irqsave(host lock) */ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, u8 *asc, u8 *ascq, int verbose) { int i; /* Based on the 3ware driver translation table */ static const unsigned char sense_table[][4] = { /* BBD|ECC|ID|MAR */ {0xd1, ABORTED_COMMAND, 0x00, 0x00}, // Device busy Aborted command /* BBD|ECC|ID */ {0xd0, ABORTED_COMMAND, 0x00, 0x00}, // Device busy Aborted command /* ECC|MC|MARK */ {0x61, HARDWARE_ERROR, 0x00, 0x00}, // Device fault Hardware error /* ICRC|ABRT */ /* NB: ICRC & !ABRT is BBD */ {0x84, ABORTED_COMMAND, 0x47, 0x00}, // Data CRC error SCSI parity error /* MC|ID|ABRT|TRK0|MARK */ {0x37, NOT_READY, 0x04, 0x00}, // Unit offline Not ready /* MCR|MARK */ {0x09, NOT_READY, 0x04, 0x00}, // Unrecovered disk error Not ready /* Bad address mark */ {0x01, MEDIUM_ERROR, 0x13, 0x00}, // Address mark not found for data field /* TRK0 - Track 0 not found */ {0x02, HARDWARE_ERROR, 0x00, 0x00}, // Hardware error /* Abort: 0x04 is not translated here, see below */ /* Media change request */ {0x08, NOT_READY, 0x04, 0x00}, // FIXME: faking offline /* SRV/IDNF - ID not found */ {0x10, ILLEGAL_REQUEST, 0x21, 0x00}, // Logical address out of range /* MC - Media Changed */ {0x20, UNIT_ATTENTION, 0x28, 0x00}, // Not ready to ready change, medium may have changed /* ECC - Uncorrectable ECC error */ {0x40, MEDIUM_ERROR, 0x11, 0x04}, // Unrecovered read error /* BBD - block marked bad */ {0x80, MEDIUM_ERROR, 0x11, 0x04}, // Block marked bad Medium error, unrecovered read error {0xFF, 0xFF, 0xFF, 0xFF}, // END mark }; static const unsigned char stat_table[][4] = { /* Must be first because BUSY means no other bits valid */ {0x80, ABORTED_COMMAND, 0x47, 0x00}, // Busy, fake parity for now {0x40, ILLEGAL_REQUEST, 0x21, 0x04}, // Device ready, unaligned write command {0x20, HARDWARE_ERROR, 0x44, 0x00}, // Device fault, internal target failure {0x08, ABORTED_COMMAND, 0x47, 0x00}, // Timed out in xfer, fake parity for now {0x04, RECOVERED_ERROR, 0x11, 0x00}, // Recovered ECC error Medium error, recovered {0xFF, 0xFF, 0xFF, 0xFF}, // END mark }; /* * Is this an error we can process/parse */ if (drv_stat & ATA_BUSY) { drv_err = 0; /* Ignore the err bits, they're invalid */ } if (drv_err) { /* Look for drv_err */ for (i = 0; sense_table[i][0] != 0xFF; i++) { /* Look for best matches first */ if ((sense_table[i][0] & drv_err) == sense_table[i][0]) { *sk = sense_table[i][1]; *asc = sense_table[i][2]; *ascq = sense_table[i][3]; goto translate_done; } } } /* * Fall back to interpreting status bits. Note that if the drv_err * has only the ABRT bit set, we decode drv_stat. ABRT by itself * is not descriptive enough. */ for (i = 0; stat_table[i][0] != 0xFF; i++) { if (stat_table[i][0] & drv_stat) { *sk = stat_table[i][1]; *asc = stat_table[i][2]; *ascq = stat_table[i][3]; goto translate_done; } } /* * We need a sensible error return here, which is tricky, and one * that won't cause people to do things like return a disk wrongly. */ *sk = ABORTED_COMMAND; *asc = 0x00; *ascq = 0x00; translate_done: if (verbose) pr_err("ata%u: translated ATA stat/err 0x%02x/%02x to SCSI SK/ASC/ASCQ 0x%x/%02x/%02x\n", id, drv_stat, drv_err, *sk, *asc, *ascq); return; } /* * ata_gen_passthru_sense - Generate check condition sense block. * @qc: Command that completed. * * This function is specific to the ATA descriptor format sense * block specified for the ATA pass through commands. Regardless * of whether the command errored or not, return a sense * block. Copy all controller registers into the sense * block. If there was no error, we get the request from an ATA * passthrough command, so we use the following sense data: * sk = RECOVERED ERROR * asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE * * * LOCKING: * None. */ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; struct ata_taskfile *tf = &qc->result_tf; unsigned char *sb = cmd->sense_buffer; unsigned char *desc = sb + 8; int verbose = qc->ap->ops->error_handler == NULL; u8 sense_key, asc, ascq; memset(sb, 0, SCSI_SENSE_BUFFERSIZE); cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; /* * Use ata_to_sense_error() to map status register bits * onto sense key, asc & ascq. */ if (qc->err_mask || tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature, &sense_key, &asc, &ascq, verbose); ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { /* * ATA PASS-THROUGH INFORMATION AVAILABLE * Always in descriptor format sense. */ scsi_build_sense_buffer(1, cmd->sense_buffer, RECOVERED_ERROR, 0, 0x1D); } if ((cmd->sense_buffer[0] & 0x7f) >= 0x72) { u8 len; /* descriptor format */ len = sb[7]; desc = (char *)scsi_sense_desc_find(sb, len + 8, 9); if (!desc) { if (SCSI_SENSE_BUFFERSIZE < len + 14) return; sb[7] = len + 14; desc = sb + 8 + len; } desc[0] = 9; desc[1] = 12; /* * Copy registers into sense buffer. */ desc[2] = 0x00; desc[3] = tf->feature; /* == error reg */ desc[5] = tf->nsect; desc[7] = tf->lbal; desc[9] = tf->lbam; desc[11] = tf->lbah; desc[12] = tf->device; desc[13] = tf->command; /* == status reg */ /* * Fill in Extend bit, and the high order bytes * if applicable. */ if (tf->flags & ATA_TFLAG_LBA48) { desc[2] |= 0x01; desc[4] = tf->hob_nsect; desc[6] = tf->hob_lbal; desc[8] = tf->hob_lbam; desc[10] = tf->hob_lbah; } } else { /* Fixed sense format */ desc[0] = tf->feature; desc[1] = tf->command; /* status */ desc[2] = tf->device; desc[3] = tf->nsect; desc[7] = 0; if (tf->flags & ATA_TFLAG_LBA48) { desc[8] |= 0x80; if (tf->hob_nsect) desc[8] |= 0x40; if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah) desc[8] |= 0x20; } desc[9] = tf->lbal; desc[10] = tf->lbam; desc[11] = tf->lbah; } } /** * ata_gen_ata_sense - generate a SCSI fixed sense block * @qc: Command that we are erroring out * * Generate sense block for a failed ATA command @qc. Descriptor * format is used to accommodate LBA48 block address. * * LOCKING: * None. */ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) { struct ata_device *dev = qc->dev; struct scsi_cmnd *cmd = qc->scsicmd; struct ata_taskfile *tf = &qc->result_tf; unsigned char *sb = cmd->sense_buffer; int verbose = qc->ap->ops->error_handler == NULL; u64 block; u8 sense_key, asc, ascq; memset(sb, 0, SCSI_SENSE_BUFFERSIZE); cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; if (ata_dev_disabled(dev)) { /* Device disabled after error recovery */ /* LOGICAL UNIT NOT READY, HARD RESET REQUIRED */ ata_scsi_set_sense(dev, cmd, NOT_READY, 0x04, 0x21); return; } /* Use ata_to_sense_error() to map status register bits * onto sense key, asc & ascq. */ if (qc->err_mask || tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature, &sense_key, &asc, &ascq, verbose); ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq); } else { /* Could not decode error */ ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n", tf->command, qc->err_mask); ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); return; } block = ata_tf_read_block(&qc->result_tf, dev); if (block == U64_MAX) return; scsi_set_sense_information(sb, SCSI_SENSE_BUFFERSIZE, block); } void ata_scsi_sdev_config(struct scsi_device *sdev) { sdev->use_10_for_rw = 1; sdev->use_10_for_ms = 1; sdev->no_write_same = 1; /* Schedule policy is determined by ->qc_defer() callback and * it needs to see every deferred qc. Set dev_blocked to 1 to * prevent SCSI midlayer from automatically deferring * requests. */ sdev->max_device_blocked = 1; } /** * ata_scsi_dma_need_drain - Check whether data transfer may overflow * @rq: request to be checked * * ATAPI commands which transfer variable length data to host * might overflow due to application error or hardware bug. This * function checks whether overflow should be drained and ignored * for @request. * * LOCKING: * None. * * RETURNS: * 1 if ; otherwise, 0. */ bool ata_scsi_dma_need_drain(struct request *rq) { return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC; } EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain); int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) { struct request_queue *q = sdev->request_queue; if (!ata_id_has_unload(dev->id)) dev->flags |= ATA_DFLAG_NO_UNLOAD; /* configure max sectors */ blk_queue_max_hw_sectors(q, dev->max_sectors); if (dev->class == ATA_DEV_ATAPI) { sdev->sector_size = ATA_SECT_SIZE; /* set DMA padding */ blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1); /* make room for appending the drain */ blk_queue_max_segments(q, queue_max_segments(q) - 1); sdev->dma_drain_len = ATAPI_MAX_DRAIN; sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len, q->bounce_gfp | GFP_KERNEL); if (!sdev->dma_drain_buf) { ata_dev_err(dev, "drain buffer allocation failed\n"); return -ENOMEM; } } else { sdev->sector_size = ata_id_logical_sector_size(dev->id); sdev->manage_start_stop = 1; } /* * ata_pio_sectors() expects buffer for each sector to not cross * page boundary. Enforce it by requiring buffers to be sector * aligned, which works iff sector_size is not larger than * PAGE_SIZE. ATAPI devices also need the alignment as * IDENTIFY_PACKET is executed as ATA_PROT_PIO. */ if (sdev->sector_size > PAGE_SIZE) ata_dev_warn(dev, "sector_size=%u > PAGE_SIZE, PIO may malfunction\n", sdev->sector_size); blk_queue_update_dma_alignment(q, sdev->sector_size - 1); if (dev->flags & ATA_DFLAG_AN) set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events); if (dev->flags & ATA_DFLAG_NCQ) { int depth; depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id)); depth = min(ATA_MAX_QUEUE, depth); scsi_change_queue_depth(sdev, depth); } if (dev->flags & ATA_DFLAG_TRUSTED) sdev->security_supported = 1; dev->sdev = sdev; return 0; } /** * ata_scsi_slave_config - Set SCSI device attributes * @sdev: SCSI device to examine * * This is called before we actually start reading * and writing to the device, to configure certain * SCSI mid-layer behaviors. * * LOCKING: * Defined by SCSI layer. We don't really care. */ int ata_scsi_slave_config(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = __ata_scsi_find_dev(ap, sdev); int rc = 0; ata_scsi_sdev_config(sdev); if (dev) rc = ata_scsi_dev_config(sdev, dev); return rc; } EXPORT_SYMBOL_GPL(ata_scsi_slave_config); /** * ata_scsi_slave_destroy - SCSI device is about to be destroyed * @sdev: SCSI device to be destroyed * * @sdev is about to be destroyed for hot/warm unplugging. If * this unplugging was initiated by libata as indicated by NULL * dev->sdev, this function doesn't have to do anything. * Otherwise, SCSI layer initiated warm-unplug is in progress. * Clear dev->sdev, schedule the device for ATA detach and invoke * EH. * * LOCKING: * Defined by SCSI layer. We don't really care. */ void ata_scsi_slave_destroy(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); unsigned long flags; struct ata_device *dev; if (!ap->ops->error_handler) return; spin_lock_irqsave(ap->lock, flags); dev = __ata_scsi_find_dev(ap, sdev); if (dev && dev->sdev) { /* SCSI device already in CANCEL state, no need to offline it */ dev->sdev = NULL; dev->flags |= ATA_DFLAG_DETACH; ata_port_schedule_eh(ap); } spin_unlock_irqrestore(ap->lock, flags); kfree(sdev->dma_drain_buf); } EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy); /** * ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command * @qc: Storage for translated ATA taskfile * * Sets up an ATA taskfile to issue STANDBY (to stop) or READ VERIFY * (to start). Perhaps these commands should be preceded by * CHECK POWER MODE to see what power mode the device is already in. * [See SAT revision 5 at www.t10.org] * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. */ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; struct ata_taskfile *tf = &qc->tf; const u8 *cdb = scmd->cmnd; u16 fp; u8 bp = 0xff; if (scmd->cmd_len < 5) { fp = 4; goto invalid_fld; } tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; tf->protocol = ATA_PROT_NODATA; if (cdb[1] & 0x1) { ; /* ignore IMMED bit, violates sat-r05 */ } if (cdb[4] & 0x2) { fp = 4; bp = 1; goto invalid_fld; /* LOEJ bit set not supported */ } if (((cdb[4] >> 4) & 0xf) != 0) { fp = 4; bp = 3; goto invalid_fld; /* power conditions not supported */ } if (cdb[4] & 0x1) { tf->nsect = 1; /* 1 sector, lba=0 */ if (qc->dev->flags & ATA_DFLAG_LBA) { tf->flags |= ATA_TFLAG_LBA; tf->lbah = 0x0; tf->lbam = 0x0; tf->lbal = 0x0; tf->device |= ATA_LBA; } else { /* CHS */ tf->lbal = 0x1; /* sect */ tf->lbam = 0x0; /* cyl low */ tf->lbah = 0x0; /* cyl high */ } tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ } else { /* Some odd clown BIOSen issue spindown on power off (ACPI S4 * or S5) causing some drives to spin up and down again. */ if ((qc->ap->flags & ATA_FLAG_NO_POWEROFF_SPINDOWN) && system_state == SYSTEM_POWER_OFF) goto skip; if ((qc->ap->flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) && system_entering_hibernation()) goto skip; /* Issue ATA STANDBY IMMEDIATE command */ tf->command = ATA_CMD_STANDBYNOW1; } /* * Standby and Idle condition timers could be implemented but that * would require libata to implement the Power condition mode page * and allow the user to change it. Changing mode pages requires * MODE SELECT to be implemented. */ return 0; invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, bp); return 1; skip: scmd->result = SAM_STAT_GOOD; return 1; } /** * ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command * @qc: Storage for translated ATA taskfile * * Sets up an ATA taskfile to issue FLUSH CACHE or * FLUSH CACHE EXT. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. */ static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &qc->tf; tf->flags |= ATA_TFLAG_DEVICE; tf->protocol = ATA_PROT_NODATA; if (qc->dev->flags & ATA_DFLAG_FLUSH_EXT) tf->command = ATA_CMD_FLUSH_EXT; else tf->command = ATA_CMD_FLUSH; /* flush is critical for IO integrity, consider it an IO command */ qc->flags |= ATA_QCFLAG_IO; return 0; } /** * scsi_6_lba_len - Get LBA and transfer length * @cdb: SCSI command to translate * * Calculate LBA and transfer length for 6-byte commands. * * RETURNS: * @plba: the LBA * @plen: the transfer length */ static void scsi_6_lba_len(const u8 *cdb, u64 *plba, u32 *plen) { u64 lba = 0; u32 len; VPRINTK("six-byte command\n"); lba |= ((u64)(cdb[1] & 0x1f)) << 16; lba |= ((u64)cdb[2]) << 8; lba |= ((u64)cdb[3]); len = cdb[4]; *plba = lba; *plen = len; } /** * scsi_10_lba_len - Get LBA and transfer length * @cdb: SCSI command to translate * * Calculate LBA and transfer length for 10-byte commands. * * RETURNS: * @plba: the LBA * @plen: the transfer length */ static void scsi_10_lba_len(const u8 *cdb, u64 *plba, u32 *plen) { u64 lba = 0; u32 len = 0; VPRINTK("ten-byte command\n"); lba |= ((u64)cdb[2]) << 24; lba |= ((u64)cdb[3]) << 16; lba |= ((u64)cdb[4]) << 8; lba |= ((u64)cdb[5]); len |= ((u32)cdb[7]) << 8; len |= ((u32)cdb[8]); *plba = lba; *plen = len; } /** * scsi_16_lba_len - Get LBA and transfer length * @cdb: SCSI command to translate * * Calculate LBA and transfer length for 16-byte commands. * * RETURNS: * @plba: the LBA * @plen: the transfer length */ static void scsi_16_lba_len(const u8 *cdb, u64 *plba, u32 *plen) { u64 lba = 0; u32 len = 0; VPRINTK("sixteen-byte command\n"); lba |= ((u64)cdb[2]) << 56; lba |= ((u64)cdb[3]) << 48; lba |= ((u64)cdb[4]) << 40; lba |= ((u64)cdb[5]) << 32; lba |= ((u64)cdb[6]) << 24; lba |= ((u64)cdb[7]) << 16; lba |= ((u64)cdb[8]) << 8; lba |= ((u64)cdb[9]); len |= ((u32)cdb[10]) << 24; len |= ((u32)cdb[11]) << 16; len |= ((u32)cdb[12]) << 8; len |= ((u32)cdb[13]); *plba = lba; *plen = len; } /** * ata_scsi_verify_xlat - Translate SCSI VERIFY command into an ATA one * @qc: Storage for translated ATA taskfile * * Converts SCSI VERIFY command to an ATA READ VERIFY command. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. */ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; u64 dev_sectors = qc->dev->n_sectors; const u8 *cdb = scmd->cmnd; u64 block; u32 n_block; u16 fp; tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf->protocol = ATA_PROT_NODATA; if (cdb[0] == VERIFY) { if (scmd->cmd_len < 10) { fp = 9; goto invalid_fld; } scsi_10_lba_len(cdb, &block, &n_block); } else if (cdb[0] == VERIFY_16) { if (scmd->cmd_len < 16) { fp = 15; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); } else { fp = 0; goto invalid_fld; } if (!n_block) goto nothing_to_do; if (block >= dev_sectors) goto out_of_range; if ((block + n_block) > dev_sectors) goto out_of_range; if (dev->flags & ATA_DFLAG_LBA) { tf->flags |= ATA_TFLAG_LBA; if (lba_28_ok(block, n_block)) { /* use LBA28 */ tf->command = ATA_CMD_VERIFY; tf->device |= (block >> 24) & 0xf; } else if (lba_48_ok(block, n_block)) { if (!(dev->flags & ATA_DFLAG_LBA48)) goto out_of_range; /* use LBA48 */ tf->flags |= ATA_TFLAG_LBA48; tf->command = ATA_CMD_VERIFY_EXT; tf->hob_nsect = (n_block >> 8) & 0xff; tf->hob_lbah = (block >> 40) & 0xff; tf->hob_lbam = (block >> 32) & 0xff; tf->hob_lbal = (block >> 24) & 0xff; } else /* request too large even for LBA48 */ goto out_of_range; tf->nsect = n_block & 0xff; tf->lbah = (block >> 16) & 0xff; tf->lbam = (block >> 8) & 0xff; tf->lbal = block & 0xff; tf->device |= ATA_LBA; } else { /* CHS */ u32 sect, head, cyl, track; if (!lba_28_ok(block, n_block)) goto out_of_range; /* Convert LBA to CHS */ track = (u32)block / dev->sectors; cyl = track / dev->heads; head = track % dev->heads; sect = (u32)block % dev->sectors + 1; DPRINTK("block %u track %u cyl %u head %u sect %u\n", (u32)block, track, cyl, head, sect); /* Check whether the converted CHS can fit. Cylinder: 0-65535 Head: 0-15 Sector: 1-255*/ if ((cyl >> 16) || (head >> 4) || (sect >> 8) || (!sect)) goto out_of_range; tf->command = ATA_CMD_VERIFY; tf->nsect = n_block & 0xff; /* Sector count 0 means 256 sectors */ tf->lbal = sect; tf->lbam = cyl; tf->lbah = cyl >> 8; tf->device |= head; } return 0; invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff); return 1; out_of_range: ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x0); /* "Logical Block Address out of range" */ return 1; nothing_to_do: scmd->result = SAM_STAT_GOOD; return 1; } static bool ata_check_nblocks(struct scsi_cmnd *scmd, u32 n_blocks) { struct request *rq = scmd->request; u32 req_blocks; if (!blk_rq_is_passthrough(rq)) return true; req_blocks = blk_rq_bytes(rq) / scmd->device->sector_size; if (n_blocks > req_blocks) return false; return true; } /** * ata_scsi_rw_xlat - Translate SCSI r/w command into an ATA one * @qc: Storage for translated ATA taskfile * * Converts any of six SCSI read/write commands into the * ATA counterpart, including starting sector (LBA), * sector count, and taking into account the device's LBA48 * support. * * Commands %READ_6, %READ_10, %READ_16, %WRITE_6, %WRITE_10, and * %WRITE_16 are currently supported. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on error. */ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; struct request *rq = scmd->request; int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); unsigned int tf_flags = 0; u64 block; u32 n_block; int rc; u16 fp = 0; if (cdb[0] == WRITE_10 || cdb[0] == WRITE_6 || cdb[0] == WRITE_16) tf_flags |= ATA_TFLAG_WRITE; /* Calculate the SCSI LBA, transfer length and FUA. */ switch (cdb[0]) { case READ_10: case WRITE_10: if (unlikely(scmd->cmd_len < 10)) { fp = 9; goto invalid_fld; } scsi_10_lba_len(cdb, &block, &n_block); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; if (!ata_check_nblocks(scmd, n_block)) goto invalid_fld; break; case READ_6: case WRITE_6: if (unlikely(scmd->cmd_len < 6)) { fp = 5; goto invalid_fld; } scsi_6_lba_len(cdb, &block, &n_block); /* for 6-byte r/w commands, transfer length 0 * means 256 blocks of data, not 0 block. */ if (!n_block) n_block = 256; if (!ata_check_nblocks(scmd, n_block)) goto invalid_fld; break; case READ_16: case WRITE_16: if (unlikely(scmd->cmd_len < 16)) { fp = 15; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; if (!ata_check_nblocks(scmd, n_block)) goto invalid_fld; break; default: DPRINTK("no-byte command\n"); fp = 0; goto invalid_fld; } /* Check and compose ATA command */ if (!n_block) /* For 10-byte and 16-byte SCSI R/W commands, transfer * length 0 means transfer 0 block of data. * However, for ATA R/W commands, sector count 0 means * 256 or 65536 sectors, not 0 sectors as in SCSI. * * WARNING: one or two older ATA drives treat 0 as 0... */ goto nothing_to_do; qc->flags |= ATA_QCFLAG_IO; qc->nbytes = n_block * scmd->device->sector_size; rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags, qc->hw_tag, class); if (likely(rc == 0)) return 0; if (rc == -ERANGE) goto out_of_range; /* treat all other errors as -EINVAL, fall through */ invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff); return 1; out_of_range: ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x0); /* "Logical Block Address out of range" */ return 1; nothing_to_do: scmd->result = SAM_STAT_GOOD; return 1; } static void ata_qc_done(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; void (*done)(struct scsi_cmnd *) = qc->scsidone; ata_qc_free(qc); done(cmd); } static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; int need_sense = (qc->err_mask != 0); /* For ATA pass thru (SAT) commands, generate a sense block if * user mandated it or if there's an error. Note that if we * generate because the user forced us to [CK_COND =1], a check * condition is generated and the ATA register values are returned * whether the command completed successfully or not. If there * was no error, we use the following sense data: * sk = RECOVERED ERROR * asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE */ if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) && ((cdb[2] & 0x20) || need_sense)) ata_gen_passthru_sense(qc); else if (qc->flags & ATA_QCFLAG_SENSE_VALID) cmd->result = SAM_STAT_CHECK_CONDITION; else if (need_sense) ata_gen_ata_sense(qc); else cmd->result = SAM_STAT_GOOD; if (need_sense && !ap->ops->error_handler) ata_dump_status(ap->print_id, &qc->result_tf); ata_qc_done(qc); } /** * ata_scsi_translate - Translate then issue SCSI command to ATA device * @dev: ATA device to which the command is addressed * @cmd: SCSI command to execute * @xlat_func: Actor which translates @cmd to an ATA taskfile * * Our ->queuecommand() function has decided that the SCSI * command issued can be directly translated into an ATA * command, rather than handled internally. * * This function sets up an ata_queued_cmd structure for the * SCSI command, and sends that ata_queued_cmd to the hardware. * * The xlat_func argument (actor) returns 0 if ready to execute * ATA command, else 1 to finish translation. If 1 is returned * then cmd->result (and possibly cmd->sense_buffer) are assumed * to be set reflecting an error condition or clean (early) * termination. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * 0 on success, SCSI_ML_QUEUE_DEVICE_BUSY if the command * needs to be deferred. */ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd, ata_xlat_func_t xlat_func) { struct ata_port *ap = dev->link->ap; struct ata_queued_cmd *qc; int rc; VPRINTK("ENTER\n"); qc = ata_scsi_qc_new(dev, cmd); if (!qc) goto err_mem; /* data is present; dma-map it */ if (cmd->sc_data_direction == DMA_FROM_DEVICE || cmd->sc_data_direction == DMA_TO_DEVICE) { if (unlikely(scsi_bufflen(cmd) < 1)) { ata_dev_warn(dev, "WARNING: zero len r/w req\n"); goto err_did; } ata_sg_init(qc, scsi_sglist(cmd), scsi_sg_count(cmd)); qc->dma_dir = cmd->sc_data_direction; } qc->complete_fn = ata_scsi_qc_complete; if (xlat_func(qc)) goto early_finish; if (ap->ops->qc_defer) { if ((rc = ap->ops->qc_defer(qc))) goto defer; } /* select device, send command to hardware */ ata_qc_issue(qc); VPRINTK("EXIT\n"); return 0; early_finish: ata_qc_free(qc); cmd->scsi_done(cmd); DPRINTK("EXIT - early finish (good or error)\n"); return 0; err_did: ata_qc_free(qc); cmd->result = (DID_ERROR << 16); cmd->scsi_done(cmd); err_mem: DPRINTK("EXIT - internal\n"); return 0; defer: ata_qc_free(qc); DPRINTK("EXIT - defer\n"); if (rc == ATA_DEFER_LINK) return SCSI_MLQUEUE_DEVICE_BUSY; else return SCSI_MLQUEUE_HOST_BUSY; } struct ata_scsi_args { struct ata_device *dev; u16 *id; struct scsi_cmnd *cmd; }; /** * ata_scsi_rbuf_get - Map response buffer. * @cmd: SCSI command containing buffer to be mapped. * @flags: unsigned long variable to store irq enable status * @copy_in: copy in from user buffer * * Prepare buffer for simulated SCSI commands. * * LOCKING: * spin_lock_irqsave(ata_scsi_rbuf_lock) on success * * RETURNS: * Pointer to response buffer. */ static void *ata_scsi_rbuf_get(struct scsi_cmnd *cmd, bool copy_in, unsigned long *flags) { spin_lock_irqsave(&ata_scsi_rbuf_lock, *flags); memset(ata_scsi_rbuf, 0, ATA_SCSI_RBUF_SIZE); if (copy_in) sg_copy_to_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE); return ata_scsi_rbuf; } /** * ata_scsi_rbuf_put - Unmap response buffer. * @cmd: SCSI command containing buffer to be unmapped. * @copy_out: copy out result * @flags: @flags passed to ata_scsi_rbuf_get() * * Returns rbuf buffer. The result is copied to @cmd's buffer if * @copy_back is true. * * LOCKING: * Unlocks ata_scsi_rbuf_lock. */ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, bool copy_out, unsigned long *flags) { if (copy_out) sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE); spin_unlock_irqrestore(&ata_scsi_rbuf_lock, *flags); } /** * ata_scsi_rbuf_fill - wrapper for SCSI command simulators * @args: device IDENTIFY data / SCSI command of interest. * @actor: Callback hook for desired SCSI command simulator * * Takes care of the hard work of simulating a SCSI command... * Mapping the response buffer, calling the command's handler, * and handling the handler's return value. This return value * indicates whether the handler wishes the SCSI command to be * completed successfully (0), or not (in which case cmd->result * and sense buffer are assumed to be set). * * LOCKING: * spin_lock_irqsave(host lock) */ static void ata_scsi_rbuf_fill(struct ata_scsi_args *args, unsigned int (*actor)(struct ata_scsi_args *args, u8 *rbuf)) { u8 *rbuf; unsigned int rc; struct scsi_cmnd *cmd = args->cmd; unsigned long flags; rbuf = ata_scsi_rbuf_get(cmd, false, &flags); rc = actor(args, rbuf); ata_scsi_rbuf_put(cmd, rc == 0, &flags); if (rc == 0) cmd->result = SAM_STAT_GOOD; } /** * ata_scsiop_inq_std - Simulate INQUIRY command * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Returns standard device identification data associated * with non-VPD INQUIRY command output. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) { static const u8 versions[] = { 0x00, 0x60, /* SAM-3 (no version claimed) */ 0x03, 0x20, /* SBC-2 (no version claimed) */ 0x03, 0x00 /* SPC-3 (no version claimed) */ }; static const u8 versions_zbc[] = { 0x00, 0xA0, /* SAM-5 (no version claimed) */ 0x06, 0x00, /* SBC-4 (no version claimed) */ 0x05, 0xC0, /* SPC-5 (no version claimed) */ 0x60, 0x24, /* ZBC r05 */ }; u8 hdr[] = { TYPE_DISK, 0, 0x5, /* claim SPC-3 version compatibility */ 2, 95 - 4, 0, 0, 2 }; VPRINTK("ENTER\n"); /* set scsi removable (RMB) bit per ata bit, or if the * AHCI port says it's external (Hotplug-capable, eSATA). */ if (ata_id_removable(args->id) || (args->dev->link->ap->pflags & ATA_PFLAG_EXTERNAL)) hdr[1] |= (1 << 7); if (args->dev->class == ATA_DEV_ZAC) { hdr[0] = TYPE_ZBC; hdr[2] = 0x7; /* claim SPC-5 version compatibility */ } memcpy(rbuf, hdr, sizeof(hdr)); memcpy(&rbuf[8], "ATA ", 8); ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); /* From SAT, use last 2 words from fw rev unless they are spaces */ ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV + 2, 4); if (strncmp(&rbuf[32], " ", 4) == 0) ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); if (rbuf[32] == 0 || rbuf[32] == ' ') memcpy(&rbuf[32], "n/a ", 4); if (ata_id_zoned_cap(args->id) || args->dev->class == ATA_DEV_ZAC) memcpy(rbuf + 58, versions_zbc, sizeof(versions_zbc)); else memcpy(rbuf + 58, versions, sizeof(versions)); return 0; } /** * ata_scsiop_inq_00 - Simulate INQUIRY VPD page 0, list of pages * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Returns list of inquiry VPD pages available. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf) { int num_pages; static const u8 pages[] = { 0x00, /* page 0x00, this page */ 0x80, /* page 0x80, unit serial no page */ 0x83, /* page 0x83, device ident page */ 0x89, /* page 0x89, ata info page */ 0xb0, /* page 0xb0, block limits page */ 0xb1, /* page 0xb1, block device characteristics page */ 0xb2, /* page 0xb2, thin provisioning page */ 0xb6, /* page 0xb6, zoned block device characteristics */ }; num_pages = sizeof(pages); if (!(args->dev->flags & ATA_DFLAG_ZAC)) num_pages--; rbuf[3] = num_pages; /* number of supported VPD pages */ memcpy(rbuf + 4, pages, num_pages); return 0; } /** * ata_scsiop_inq_80 - Simulate INQUIRY VPD page 80, device serial number * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Returns ATA device serial number. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf) { static const u8 hdr[] = { 0, 0x80, /* this page code */ 0, ATA_ID_SERNO_LEN, /* page len */ }; memcpy(rbuf, hdr, sizeof(hdr)); ata_id_string(args->id, (unsigned char *) &rbuf[4], ATA_ID_SERNO, ATA_ID_SERNO_LEN); return 0; } /** * ata_scsiop_inq_83 - Simulate INQUIRY VPD page 83, device identity * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Yields two logical unit device identification designators: * - vendor specific ASCII containing the ATA serial number * - SAT defined "t10 vendor id based" containing ASCII vendor * name ("ATA "), model and serial numbers. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf) { const int sat_model_serial_desc_len = 68; int num; rbuf[1] = 0x83; /* this page code */ num = 4; /* piv=0, assoc=lu, code_set=ACSII, designator=vendor */ rbuf[num + 0] = 2; rbuf[num + 3] = ATA_ID_SERNO_LEN; num += 4; ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_SERNO, ATA_ID_SERNO_LEN); num += ATA_ID_SERNO_LEN; /* SAT defined lu model and serial numbers descriptor */ /* piv=0, assoc=lu, code_set=ACSII, designator=t10 vendor id */ rbuf[num + 0] = 2; rbuf[num + 1] = 1; rbuf[num + 3] = sat_model_serial_desc_len; num += 4; memcpy(rbuf + num, "ATA ", 8); num += 8; ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_PROD, ATA_ID_PROD_LEN); num += ATA_ID_PROD_LEN; ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_SERNO, ATA_ID_SERNO_LEN); num += ATA_ID_SERNO_LEN; if (ata_id_has_wwn(args->id)) { /* SAT defined lu world wide name */ /* piv=0, assoc=lu, code_set=binary, designator=NAA */ rbuf[num + 0] = 1; rbuf[num + 1] = 3; rbuf[num + 3] = ATA_ID_WWN_LEN; num += 4; ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_WWN, ATA_ID_WWN_LEN); num += ATA_ID_WWN_LEN; } rbuf[3] = num - 4; /* page len (assume less than 256 bytes) */ return 0; } /** * ata_scsiop_inq_89 - Simulate INQUIRY VPD page 89, ATA info * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Yields SAT-specified ATA VPD page. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) { rbuf[1] = 0x89; /* our page code */ rbuf[2] = (0x238 >> 8); /* page size fixed at 238h */ rbuf[3] = (0x238 & 0xff); memcpy(&rbuf[8], "linux ", 8); memcpy(&rbuf[16], "libata ", 16); memcpy(&rbuf[32], DRV_VERSION, 4); rbuf[36] = 0x34; /* force D2H Reg FIS (34h) */ rbuf[37] = (1 << 7); /* bit 7 indicates Command FIS */ /* TODO: PMP? */ /* we don't store the ATA device signature, so we fake it */ rbuf[38] = ATA_DRDY; /* really, this is Status reg */ rbuf[40] = 0x1; rbuf[48] = 0x1; rbuf[56] = ATA_CMD_ID_ATA; memcpy(&rbuf[60], &args->id[0], 512); return 0; } static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; u16 min_io_sectors; rbuf[1] = 0xb0; rbuf[3] = 0x3c; /* required VPD size with unmap support */ /* * Optimal transfer length granularity. * * This is always one physical block, but for disks with a smaller * logical than physical sector size we need to figure out what the * latter is. */ min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id); put_unaligned_be16(min_io_sectors, &rbuf[6]); /* * Optimal unmap granularity. * * The ATA spec doesn't even know about a granularity or alignment * for the TRIM command. We can leave away most of the unmap related * VPD page entries, but we have specifify a granularity to signal * that we support some form of unmap - in thise case via WRITE SAME * with the unmap bit set. */ if (ata_id_has_trim(args->id)) { u64 max_blocks = 65535 * ATA_MAX_TRIM_RNUM; if (dev->horkage & ATA_HORKAGE_MAX_TRIM_128M) max_blocks = 128 << (20 - SECTOR_SHIFT); put_unaligned_be64(max_blocks, &rbuf[36]); put_unaligned_be32(1, &rbuf[28]); } return 0; } static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf) { int form_factor = ata_id_form_factor(args->id); int media_rotation_rate = ata_id_rotation_rate(args->id); u8 zoned = ata_id_zoned_cap(args->id); rbuf[1] = 0xb1; rbuf[3] = 0x3c; rbuf[4] = media_rotation_rate >> 8; rbuf[5] = media_rotation_rate; rbuf[7] = form_factor; if (zoned) rbuf[8] = (zoned << 4); return 0; } static unsigned int ata_scsiop_inq_b2(struct ata_scsi_args *args, u8 *rbuf) { /* SCSI Thin Provisioning VPD page: SBC-3 rev 22 or later */ rbuf[1] = 0xb2; rbuf[3] = 0x4; rbuf[5] = 1 << 6; /* TPWS */ return 0; } static unsigned int ata_scsiop_inq_b6(struct ata_scsi_args *args, u8 *rbuf) { /* * zbc-r05 SCSI Zoned Block device characteristics VPD page */ rbuf[1] = 0xb6; rbuf[3] = 0x3C; /* * URSWRZ bit is only meaningful for host-managed ZAC drives */ if (args->dev->zac_zoned_cap & 1) rbuf[4] |= 1; put_unaligned_be32(args->dev->zac_zones_optimal_open, &rbuf[8]); put_unaligned_be32(args->dev->zac_zones_optimal_nonseq, &rbuf[12]); put_unaligned_be32(args->dev->zac_zones_max_open, &rbuf[16]); return 0; } /** * modecpy - Prepare response for MODE SENSE * @dest: output buffer * @src: data being copied * @n: length of mode page * @changeable: whether changeable parameters are requested * * Generate a generic MODE SENSE page for either current or changeable * parameters. * * LOCKING: * None. */ static void modecpy(u8 *dest, const u8 *src, int n, bool changeable) { if (changeable) { memcpy(dest, src, 2); memset(dest + 2, 0, n - 2); } else { memcpy(dest, src, n); } } /** * ata_msense_caching - Simulate MODE SENSE caching info page * @id: device IDENTIFY data * @buf: output buffer * @changeable: whether changeable parameters are requested * * Generate a caching info page, which conditionally indicates * write caching to the SCSI layer, depending on device * capabilities. * * LOCKING: * None. */ static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable) { modecpy(buf, def_cache_mpage, sizeof(def_cache_mpage), changeable); if (changeable) { buf[2] |= (1 << 2); /* ata_mselect_caching() */ } else { buf[2] |= (ata_id_wcache_enabled(id) << 2); /* write cache enable */ buf[12] |= (!ata_id_rahead_enabled(id) << 5); /* disable read ahead */ } return sizeof(def_cache_mpage); } /** * ata_msense_control - Simulate MODE SENSE control mode page * @dev: ATA device of interest * @buf: output buffer * @changeable: whether changeable parameters are requested * * Generate a generic MODE SENSE control mode page. * * LOCKING: * None. */ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf, bool changeable) { modecpy(buf, def_control_mpage, sizeof(def_control_mpage), changeable); if (changeable) { buf[2] |= (1 << 2); /* ata_mselect_control() */ } else { bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE); buf[2] |= (d_sense << 2); /* descriptor format sense data */ } return sizeof(def_control_mpage); } /** * ata_msense_rw_recovery - Simulate MODE SENSE r/w error recovery page * @buf: output buffer * @changeable: whether changeable parameters are requested * * Generate a generic MODE SENSE r/w error recovery page. * * LOCKING: * None. */ static unsigned int ata_msense_rw_recovery(u8 *buf, bool changeable) { modecpy(buf, def_rw_recovery_mpage, sizeof(def_rw_recovery_mpage), changeable); return sizeof(def_rw_recovery_mpage); } /* * We can turn this into a real blacklist if it's needed, for now just * blacklist any Maxtor BANC1G10 revision firmware */ static int ata_dev_supports_fua(u16 *id) { unsigned char model[ATA_ID_PROD_LEN + 1], fw[ATA_ID_FW_REV_LEN + 1]; if (!libata_fua) return 0; if (!ata_id_has_fua(id)) return 0; ata_id_c_string(id, model, ATA_ID_PROD, sizeof(model)); ata_id_c_string(id, fw, ATA_ID_FW_REV, sizeof(fw)); if (strcmp(model, "Maxtor")) return 1; if (strcmp(fw, "BANC1G10")) return 1; return 0; /* blacklisted */ } /** * ata_scsiop_mode_sense - Simulate MODE SENSE 6, 10 commands * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Simulate MODE SENSE commands. Assume this is invoked for direct * access devices (e.g. disks) only. There should be no block * descriptor for other device types. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; u8 *scsicmd = args->cmd->cmnd, *p = rbuf; static const u8 sat_blk_desc[] = { 0, 0, 0, 0, /* number of blocks: sat unspecified */ 0, 0, 0x2, 0x0 /* block length: 512 bytes */ }; u8 pg, spg; unsigned int ebd, page_control, six_byte; u8 dpofua, bp = 0xff; u16 fp; VPRINTK("ENTER\n"); six_byte = (scsicmd[0] == MODE_SENSE); ebd = !(scsicmd[1] & 0x8); /* dbd bit inverted == edb */ /* * LLBA bit in msense(10) ignored (compliant) */ page_control = scsicmd[2] >> 6; switch (page_control) { case 0: /* current */ case 1: /* changeable */ case 2: /* defaults */ break; /* supported */ case 3: /* saved */ goto saving_not_supp; default: fp = 2; bp = 6; goto invalid_fld; } if (six_byte) p += 4 + (ebd ? 8 : 0); else p += 8 + (ebd ? 8 : 0); pg = scsicmd[2] & 0x3f; spg = scsicmd[3]; /* * No mode subpages supported (yet) but asking for _all_ * subpages may be valid */ if (spg && (spg != ALL_SUB_MPAGES)) { fp = 3; goto invalid_fld; } switch(pg) { case RW_RECOVERY_MPAGE: p += ata_msense_rw_recovery(p, page_control == 1); break; case CACHE_MPAGE: p += ata_msense_caching(args->id, p, page_control == 1); break; case CONTROL_MPAGE: p += ata_msense_control(args->dev, p, page_control == 1); break; case ALL_MPAGES: p += ata_msense_rw_recovery(p, page_control == 1); p += ata_msense_caching(args->id, p, page_control == 1); p += ata_msense_control(args->dev, p, page_control == 1); break; default: /* invalid page code */ fp = 2; goto invalid_fld; } dpofua = 0; if (ata_dev_supports_fua(args->id) && (dev->flags & ATA_DFLAG_LBA48) && (!(dev->flags & ATA_DFLAG_PIO) || dev->multi_count)) dpofua = 1 << 4; if (six_byte) { rbuf[0] = p - rbuf - 1; rbuf[2] |= dpofua; if (ebd) { rbuf[3] = sizeof(sat_blk_desc); memcpy(rbuf + 4, sat_blk_desc, sizeof(sat_blk_desc)); } } else { unsigned int output_len = p - rbuf - 2; rbuf[0] = output_len >> 8; rbuf[1] = output_len; rbuf[3] |= dpofua; if (ebd) { rbuf[7] = sizeof(sat_blk_desc); memcpy(rbuf + 8, sat_blk_desc, sizeof(sat_blk_desc)); } } return 0; invalid_fld: ata_scsi_set_invalid_field(dev, args->cmd, fp, bp); return 1; saving_not_supp: ata_scsi_set_sense(dev, args->cmd, ILLEGAL_REQUEST, 0x39, 0x0); /* "Saving parameters not supported" */ return 1; } /** * ata_scsiop_read_cap - Simulate READ CAPACITY[ 16] commands * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Simulate READ CAPACITY commands. * * LOCKING: * None. */ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; u64 last_lba = dev->n_sectors - 1; /* LBA of the last block */ u32 sector_size; /* physical sector size in bytes */ u8 log2_per_phys; u16 lowest_aligned; sector_size = ata_id_logical_sector_size(dev->id); log2_per_phys = ata_id_log2_per_physical_sector(dev->id); lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys); VPRINTK("ENTER\n"); if (args->cmd->cmnd[0] == READ_CAPACITY) { if (last_lba >= 0xffffffffULL) last_lba = 0xffffffff; /* sector count, 32-bit */ rbuf[0] = last_lba >> (8 * 3); rbuf[1] = last_lba >> (8 * 2); rbuf[2] = last_lba >> (8 * 1); rbuf[3] = last_lba; /* sector size */ rbuf[4] = sector_size >> (8 * 3); rbuf[5] = sector_size >> (8 * 2); rbuf[6] = sector_size >> (8 * 1); rbuf[7] = sector_size; } else { /* sector count, 64-bit */ rbuf[0] = last_lba >> (8 * 7); rbuf[1] = last_lba >> (8 * 6); rbuf[2] = last_lba >> (8 * 5); rbuf[3] = last_lba >> (8 * 4); rbuf[4] = last_lba >> (8 * 3); rbuf[5] = last_lba >> (8 * 2); rbuf[6] = last_lba >> (8 * 1); rbuf[7] = last_lba; /* sector size */ rbuf[ 8] = sector_size >> (8 * 3); rbuf[ 9] = sector_size >> (8 * 2); rbuf[10] = sector_size >> (8 * 1); rbuf[11] = sector_size; rbuf[12] = 0; rbuf[13] = log2_per_phys; rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; if (ata_id_has_trim(args->id) && !(dev->horkage & ATA_HORKAGE_NOTRIM)) { rbuf[14] |= 0x80; /* LBPME */ if (ata_id_has_zero_after_trim(args->id) && dev->horkage & ATA_HORKAGE_ZERO_AFTER_TRIM) { ata_dev_info(dev, "Enabling discard_zeroes_data\n"); rbuf[14] |= 0x40; /* LBPRZ */ } } if (ata_id_zoned_cap(args->id) || args->dev->class == ATA_DEV_ZAC) rbuf[12] = (1 << 4); /* RC_BASIS */ } return 0; } /** * ata_scsiop_report_luns - Simulate REPORT LUNS command * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Simulate REPORT LUNS command. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf) { VPRINTK("ENTER\n"); rbuf[3] = 8; /* just one lun, LUN 0, size 8 bytes */ return 0; } static void atapi_sense_complete(struct ata_queued_cmd *qc) { if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) { /* FIXME: not quite right; we don't want the * translation of taskfile registers into * a sense descriptors, since that's only * correct for ATA, not ATAPI */ ata_gen_passthru_sense(qc); } ata_qc_done(qc); } /* is it pointless to prefer PIO for "safety reasons"? */ static inline int ata_pio_use_silly(struct ata_port *ap) { return (ap->flags & ATA_FLAG_PIO_DMA); } static void atapi_request_sense(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct scsi_cmnd *cmd = qc->scsicmd; DPRINTK("ATAPI request sense\n"); memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); #ifdef CONFIG_ATA_SFF if (ap->ops->sff_tf_read) ap->ops->sff_tf_read(ap, &qc->tf); #endif /* fill these in, for the case where they are -not- overwritten */ cmd->sense_buffer[0] = 0x70; cmd->sense_buffer[2] = qc->tf.feature >> 4; ata_qc_reinit(qc); /* setup sg table and init transfer direction */ sg_init_one(&qc->sgent, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); ata_sg_init(qc, &qc->sgent, 1); qc->dma_dir = DMA_FROM_DEVICE; memset(&qc->cdb, 0, qc->dev->cdb_len); qc->cdb[0] = REQUEST_SENSE; qc->cdb[4] = SCSI_SENSE_BUFFERSIZE; qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; qc->tf.command = ATA_CMD_PACKET; if (ata_pio_use_silly(ap)) { qc->tf.protocol = ATAPI_PROT_DMA; qc->tf.feature |= ATAPI_PKT_DMA; } else { qc->tf.protocol = ATAPI_PROT_PIO; qc->tf.lbam = SCSI_SENSE_BUFFERSIZE; qc->tf.lbah = 0; } qc->nbytes = SCSI_SENSE_BUFFERSIZE; qc->complete_fn = atapi_sense_complete; ata_qc_issue(qc); DPRINTK("EXIT\n"); } /* * ATAPI devices typically report zero for their SCSI version, and sometimes * deviate from the spec WRT response data format. If SCSI version is * reported as zero like normal, then we make the following fixups: * 1) Fake MMC-5 version, to indicate to the Linux scsi midlayer this is a * modern device. * 2) Ensure response data format / ATAPI information are always correct. */ static void atapi_fixup_inquiry(struct scsi_cmnd *cmd) { u8 buf[4]; sg_copy_to_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, 4); if (buf[2] == 0) { buf[2] = 0x5; buf[3] = 0x32; } sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, 4); } static void atapi_qc_complete(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; unsigned int err_mask = qc->err_mask; VPRINTK("ENTER, err_mask 0x%X\n", err_mask); /* handle completion from new EH */ if (unlikely(qc->ap->ops->error_handler && (err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) { if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) { /* FIXME: not quite right; we don't want the * translation of taskfile registers into a * sense descriptors, since that's only * correct for ATA, not ATAPI */ ata_gen_passthru_sense(qc); } /* SCSI EH automatically locks door if sdev->locked is * set. Sometimes door lock request continues to * fail, for example, when no media is present. This * creates a loop - SCSI EH issues door lock which * fails and gets invoked again to acquire sense data * for the failed command. * * If door lock fails, always clear sdev->locked to * avoid this infinite loop. * * This may happen before SCSI scan is complete. Make * sure qc->dev->sdev isn't NULL before dereferencing. */ if (qc->cdb[0] == ALLOW_MEDIUM_REMOVAL && qc->dev->sdev) qc->dev->sdev->locked = 0; qc->scsicmd->result = SAM_STAT_CHECK_CONDITION; ata_qc_done(qc); return; } /* successful completion or old EH failure path */ if (unlikely(err_mask & AC_ERR_DEV)) { cmd->result = SAM_STAT_CHECK_CONDITION; atapi_request_sense(qc); return; } else if (unlikely(err_mask)) { /* FIXME: not quite right; we don't want the * translation of taskfile registers into * a sense descriptors, since that's only * correct for ATA, not ATAPI */ ata_gen_passthru_sense(qc); } else { if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0) atapi_fixup_inquiry(cmd); cmd->result = SAM_STAT_GOOD; } ata_qc_done(qc); } /** * atapi_xlat - Initialize PACKET taskfile * @qc: command structure to be initialized * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Zero on success, non-zero on failure. */ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; struct ata_device *dev = qc->dev; int nodata = (scmd->sc_data_direction == DMA_NONE); int using_pio = !nodata && (dev->flags & ATA_DFLAG_PIO); unsigned int nbytes; memset(qc->cdb, 0, dev->cdb_len); memcpy(qc->cdb, scmd->cmnd, scmd->cmd_len); qc->complete_fn = atapi_qc_complete; qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; if (scmd->sc_data_direction == DMA_TO_DEVICE) { qc->tf.flags |= ATA_TFLAG_WRITE; DPRINTK("direction: write\n"); } qc->tf.command = ATA_CMD_PACKET; ata_qc_set_pc_nbytes(qc); /* check whether ATAPI DMA is safe */ if (!nodata && !using_pio && atapi_check_dma(qc)) using_pio = 1; /* Some controller variants snoop this value for Packet * transfers to do state machine and FIFO management. Thus we * want to set it properly, and for DMA where it is * effectively meaningless. */ nbytes = min(ata_qc_raw_nbytes(qc), (unsigned int)63 * 1024); /* Most ATAPI devices which honor transfer chunk size don't * behave according to the spec when odd chunk size which * matches the transfer length is specified. If the number of * bytes to transfer is 2n+1. According to the spec, what * should happen is to indicate that 2n+1 is going to be * transferred and transfer 2n+2 bytes where the last byte is * padding. * * In practice, this doesn't happen. ATAPI devices first * indicate and transfer 2n bytes and then indicate and * transfer 2 bytes where the last byte is padding. * * This inconsistency confuses several controllers which * perform PIO using DMA such as Intel AHCIs and sil3124/32. * These controllers use actual number of transferred bytes to * update DMA pointer and transfer of 4n+2 bytes make those * controller push DMA pointer by 4n+4 bytes because SATA data * FISes are aligned to 4 bytes. This causes data corruption * and buffer overrun. * * Always setting nbytes to even number solves this problem * because then ATAPI devices don't have to split data at 2n * boundaries. */ if (nbytes & 0x1) nbytes++; qc->tf.lbam = (nbytes & 0xFF); qc->tf.lbah = (nbytes >> 8); if (nodata) qc->tf.protocol = ATAPI_PROT_NODATA; else if (using_pio) qc->tf.protocol = ATAPI_PROT_PIO; else { /* DMA data xfer */ qc->tf.protocol = ATAPI_PROT_DMA; qc->tf.feature |= ATAPI_PKT_DMA; if ((dev->flags & ATA_DFLAG_DMADIR) && (scmd->sc_data_direction != DMA_TO_DEVICE)) /* some SATA bridges need us to indicate data xfer direction */ qc->tf.feature |= ATAPI_DMADIR; } /* FIXME: We need to translate 0x05 READ_BLOCK_LIMITS to a MODE_SENSE as ATAPI tape drives don't get this right otherwise */ return 0; } static struct ata_device *ata_find_dev(struct ata_port *ap, int devno) { if (!sata_pmp_attached(ap)) { if (likely(devno >= 0 && devno < ata_link_max_devices(&ap->link))) return &ap->link.device[devno]; } else { if (likely(devno >= 0 && devno < ap->nr_pmp_links)) return &ap->pmp_link[devno].device[0]; } return NULL; } static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev) { int devno; /* skip commands not addressed to targets we simulate */ if (!sata_pmp_attached(ap)) { if (unlikely(scsidev->channel || scsidev->lun)) return NULL; devno = scsidev->id; } else { if (unlikely(scsidev->id || scsidev->lun)) return NULL; devno = scsidev->channel; } return ata_find_dev(ap, devno); } /** * ata_scsi_find_dev - lookup ata_device from scsi_cmnd * @ap: ATA port to which the device is attached * @scsidev: SCSI device from which we derive the ATA device * * Given various information provided in struct scsi_cmnd, * map that onto an ATA bus, and using that mapping * determine which ata_device is associated with the * SCSI command to be sent. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * Associated ATA device, or %NULL if not found. */ struct ata_device * ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev) { struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev); if (unlikely(!dev || !ata_dev_enabled(dev))) return NULL; return dev; } /* * ata_scsi_map_proto - Map pass-thru protocol value to taskfile value. * @byte1: Byte 1 from pass-thru CDB. * * RETURNS: * ATA_PROT_UNKNOWN if mapping failed/unimplemented, protocol otherwise. */ static u8 ata_scsi_map_proto(u8 byte1) { switch((byte1 & 0x1e) >> 1) { case 3: /* Non-data */ return ATA_PROT_NODATA; case 6: /* DMA */ case 10: /* UDMA Data-in */ case 11: /* UDMA Data-Out */ return ATA_PROT_DMA; case 4: /* PIO Data-in */ case 5: /* PIO Data-out */ return ATA_PROT_PIO; case 12: /* FPDMA */ return ATA_PROT_NCQ; case 0: /* Hard Reset */ case 1: /* SRST */ case 8: /* Device Diagnostic */ case 9: /* Device Reset */ case 7: /* DMA Queued */ case 15: /* Return Response Info */ default: /* Reserved */ break; } return ATA_PROT_UNKNOWN; } /** * ata_scsi_pass_thru - convert ATA pass-thru CDB to taskfile * @qc: command structure to be initialized * * Handles either 12, 16, or 32-byte versions of the CDB. * * RETURNS: * Zero on success, non-zero on failure. */ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &(qc->tf); struct scsi_cmnd *scmd = qc->scsicmd; struct ata_device *dev = qc->dev; const u8 *cdb = scmd->cmnd; u16 fp; u16 cdb_offset = 0; /* 7Fh variable length cmd means a ata pass-thru(32) */ if (cdb[0] == VARIABLE_LENGTH_CMD) cdb_offset = 9; tf->protocol = ata_scsi_map_proto(cdb[1 + cdb_offset]); if (tf->protocol == ATA_PROT_UNKNOWN) { fp = 1; goto invalid_fld; } if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0) tf->protocol = ATA_PROT_NCQ_NODATA; /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; /* * 12 and 16 byte CDBs use different offsets to * provide the various register values. */ if (cdb[0] == ATA_16) { /* * 16-byte CDB - may contain extended commands. * * If that is the case, copy the upper byte register values. */ if (cdb[1] & 0x01) { tf->hob_feature = cdb[3]; tf->hob_nsect = cdb[5]; tf->hob_lbal = cdb[7]; tf->hob_lbam = cdb[9]; tf->hob_lbah = cdb[11]; tf->flags |= ATA_TFLAG_LBA48; } else tf->flags &= ~ATA_TFLAG_LBA48; /* * Always copy low byte, device and command registers. */ tf->feature = cdb[4]; tf->nsect = cdb[6]; tf->lbal = cdb[8]; tf->lbam = cdb[10]; tf->lbah = cdb[12]; tf->device = cdb[13]; tf->command = cdb[14]; } else if (cdb[0] == ATA_12) { /* * 12-byte CDB - incapable of extended commands. */ tf->flags &= ~ATA_TFLAG_LBA48; tf->feature = cdb[3]; tf->nsect = cdb[4]; tf->lbal = cdb[5]; tf->lbam = cdb[6]; tf->lbah = cdb[7]; tf->device = cdb[8]; tf->command = cdb[9]; } else { /* * 32-byte CDB - may contain extended command fields. * * If that is the case, copy the upper byte register values. */ if (cdb[10] & 0x01) { tf->hob_feature = cdb[20]; tf->hob_nsect = cdb[22]; tf->hob_lbal = cdb[16]; tf->hob_lbam = cdb[15]; tf->hob_lbah = cdb[14]; tf->flags |= ATA_TFLAG_LBA48; } else tf->flags &= ~ATA_TFLAG_LBA48; tf->feature = cdb[21]; tf->nsect = cdb[23]; tf->lbal = cdb[19]; tf->lbam = cdb[18]; tf->lbah = cdb[17]; tf->device = cdb[24]; tf->command = cdb[25]; tf->auxiliary = get_unaligned_be32(&cdb[28]); } /* For NCQ commands copy the tag value */ if (ata_is_ncq(tf->protocol)) tf->nsect = qc->hw_tag << 3; /* enforce correct master/slave bit */ tf->device = dev->devno ? tf->device | ATA_DEV1 : tf->device & ~ATA_DEV1; switch (tf->command) { /* READ/WRITE LONG use a non-standard sect_size */ case ATA_CMD_READ_LONG: case ATA_CMD_READ_LONG_ONCE: case ATA_CMD_WRITE_LONG: case ATA_CMD_WRITE_LONG_ONCE: if (tf->protocol != ATA_PROT_PIO || tf->nsect != 1) { fp = 1; goto invalid_fld; } qc->sect_size = scsi_bufflen(scmd); break; /* commands using reported Logical Block size (e.g. 512 or 4K) */ case ATA_CMD_CFA_WRITE_NE: case ATA_CMD_CFA_TRANS_SECT: case ATA_CMD_CFA_WRITE_MULT_NE: /* XXX: case ATA_CMD_CFA_WRITE_SECTORS_WITHOUT_ERASE: */ case ATA_CMD_READ: case ATA_CMD_READ_EXT: case ATA_CMD_READ_QUEUED: /* XXX: case ATA_CMD_READ_QUEUED_EXT: */ case ATA_CMD_FPDMA_READ: case ATA_CMD_READ_MULTI: case ATA_CMD_READ_MULTI_EXT: case ATA_CMD_PIO_READ: case ATA_CMD_PIO_READ_EXT: case ATA_CMD_READ_STREAM_DMA_EXT: case ATA_CMD_READ_STREAM_EXT: case ATA_CMD_VERIFY: case ATA_CMD_VERIFY_EXT: case ATA_CMD_WRITE: case ATA_CMD_WRITE_EXT: case ATA_CMD_WRITE_FUA_EXT: case ATA_CMD_WRITE_QUEUED: case ATA_CMD_WRITE_QUEUED_FUA_EXT: case ATA_CMD_FPDMA_WRITE: case ATA_CMD_WRITE_MULTI: case ATA_CMD_WRITE_MULTI_EXT: case ATA_CMD_WRITE_MULTI_FUA_EXT: case ATA_CMD_PIO_WRITE: case ATA_CMD_PIO_WRITE_EXT: case ATA_CMD_WRITE_STREAM_DMA_EXT: case ATA_CMD_WRITE_STREAM_EXT: qc->sect_size = scmd->device->sector_size; break; /* Everything else uses 512 byte "sectors" */ default: qc->sect_size = ATA_SECT_SIZE; } /* * Set flags so that all registers will be written, pass on * write indication (used for PIO/DMA setup), result TF is * copied back and we don't whine too much about its failure. */ tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; if (scmd->sc_data_direction == DMA_TO_DEVICE) tf->flags |= ATA_TFLAG_WRITE; qc->flags |= ATA_QCFLAG_RESULT_TF | ATA_QCFLAG_QUIET; /* * Set transfer length. * * TODO: find out if we need to do more here to * cover scatter/gather case. */ ata_qc_set_pc_nbytes(qc); /* We may not issue DMA commands if no DMA mode is set */ if (tf->protocol == ATA_PROT_DMA && dev->dma_mode == 0) { fp = 1; goto invalid_fld; } /* We may not issue NCQ commands to devices not supporting NCQ */ if (ata_is_ncq(tf->protocol) && !ata_ncq_enabled(dev)) { fp = 1; goto invalid_fld; } /* sanity check for pio multi commands */ if ((cdb[1] & 0xe0) && !is_multi_taskfile(tf)) { fp = 1; goto invalid_fld; } if (is_multi_taskfile(tf)) { unsigned int multi_count = 1 << (cdb[1] >> 5); /* compare the passed through multi_count * with the cached multi_count of libata */ if (multi_count != dev->multi_count) ata_dev_warn(dev, "invalid multi_count %u ignored\n", multi_count); } /* * Filter SET_FEATURES - XFER MODE command -- otherwise, * SET_FEATURES - XFER MODE must be preceded/succeeded * by an update to hardware-specific registers for each * controller (i.e. the reason for ->set_piomode(), * ->set_dmamode(), and ->post_set_mode() hooks). */ if (tf->command == ATA_CMD_SET_FEATURES && tf->feature == SETFEATURES_XFER) { fp = (cdb[0] == ATA_16) ? 4 : 3; goto invalid_fld; } /* * Filter TPM commands by default. These provide an * essentially uncontrolled encrypted "back door" between * applications and the disk. Set libata.allow_tpm=1 if you * have a real reason for wanting to use them. This ensures * that installed software cannot easily mess stuff up without * user intent. DVR type users will probably ship with this enabled * for movie content management. * * Note that for ATA8 we can issue a DCS change and DCS freeze lock * for this and should do in future but that it is not sufficient as * DCS is an optional feature set. Thus we also do the software filter * so that we comply with the TC consortium stated goal that the user * can turn off TC features of their system. */ if (tf->command >= 0x5C && tf->command <= 0x5F && !libata_allow_tpm) { fp = (cdb[0] == ATA_16) ? 14 : 9; goto invalid_fld; } return 0; invalid_fld: ata_scsi_set_invalid_field(dev, scmd, fp, 0xff); return 1; } /** * ata_format_dsm_trim_descr() - SATL Write Same to DSM Trim * @cmd: SCSI command being translated * @trmax: Maximum number of entries that will fit in sector_size bytes. * @sector: Starting sector * @count: Total Range of request in logical sectors * * Rewrite the WRITE SAME descriptor to be a DSM TRIM little-endian formatted * descriptor. * * Upto 64 entries of the format: * 63:48 Range Length * 47:0 LBA * * Range Length of 0 is ignored. * LBA's should be sorted order and not overlap. * * NOTE: this is the same format as ADD LBA(S) TO NV CACHE PINNED SET * * Return: Number of bytes copied into sglist. */ static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax, u64 sector, u32 count) { struct scsi_device *sdp = cmd->device; size_t len = sdp->sector_size; size_t r; __le64 *buf; u32 i = 0; unsigned long flags; WARN_ON(len > ATA_SCSI_RBUF_SIZE); if (len > ATA_SCSI_RBUF_SIZE) len = ATA_SCSI_RBUF_SIZE; spin_lock_irqsave(&ata_scsi_rbuf_lock, flags); buf = ((void *)ata_scsi_rbuf); memset(buf, 0, len); while (i < trmax) { u64 entry = sector | ((u64)(count > 0xffff ? 0xffff : count) << 48); buf[i++] = __cpu_to_le64(entry); if (count <= 0xffff) break; count -= 0xffff; sector += 0xffff; } r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len); spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags); return r; } /** * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same * @qc: Command to be translated * * Translate a SCSI WRITE SAME command to be either a DSM TRIM command or * an SCT Write Same command. * Based on WRITE SAME has the UNMAP flag: * * - When set translate to DSM TRIM * - When clear translate to SCT Write Same */ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &qc->tf; struct scsi_cmnd *scmd = qc->scsicmd; struct scsi_device *sdp = scmd->device; size_t len = sdp->sector_size; struct ata_device *dev = qc->dev; const u8 *cdb = scmd->cmnd; u64 block; u32 n_block; const u32 trmax = len >> 3; u32 size; u16 fp; u8 bp = 0xff; u8 unmap = cdb[1] & 0x8; /* we may not issue DMA commands if no DMA mode is set */ if (unlikely(!dev->dma_mode)) goto invalid_opcode; /* * We only allow sending this command through the block layer, * as it modifies the DATA OUT buffer, which would corrupt user * memory for SG_IO commands. */ if (unlikely(blk_rq_is_passthrough(scmd->request))) goto invalid_opcode; if (unlikely(scmd->cmd_len < 16)) { fp = 15; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); if (!unmap || (dev->horkage & ATA_HORKAGE_NOTRIM) || !ata_id_has_trim(dev->id)) { fp = 1; bp = 3; goto invalid_fld; } /* If the request is too large the cmd is invalid */ if (n_block > 0xffff * trmax) { fp = 2; goto invalid_fld; } /* * WRITE SAME always has a sector sized buffer as payload, this * should never be a multiple entry S/G list. */ if (!scsi_sg_count(scmd)) goto invalid_param_len; /* * size must match sector size in bytes * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count) * is defined as number of 512 byte blocks to be transferred. */ size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block); if (size != len) goto invalid_param_len; if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) { /* Newer devices support queued TRIM commands */ tf->protocol = ATA_PROT_NCQ; tf->command = ATA_CMD_FPDMA_SEND; tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f; tf->nsect = qc->hw_tag << 3; tf->hob_feature = (size / 512) >> 8; tf->feature = size / 512; tf->auxiliary = 1; } else { tf->protocol = ATA_PROT_DMA; tf->hob_feature = 0; tf->feature = ATA_DSM_TRIM; tf->hob_nsect = (size / 512) >> 8; tf->nsect = size / 512; tf->command = ATA_CMD_DSM; } tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 | ATA_TFLAG_WRITE; ata_qc_set_pc_nbytes(qc); return 0; invalid_fld: ata_scsi_set_invalid_field(dev, scmd, fp, bp); return 1; invalid_param_len: /* "Parameter list length error" */ ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); return 1; invalid_opcode: /* "Invalid command operation code" */ ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x20, 0x0); return 1; } /** * ata_scsiop_maint_in - Simulate a subset of MAINTENANCE_IN * @args: device MAINTENANCE_IN data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. * * Yields a subset to satisfy scsi_report_opcode() * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; u8 *cdb = args->cmd->cmnd; u8 supported = 0; unsigned int err = 0; if (cdb[2] != 1) { ata_dev_warn(dev, "invalid command format %d\n", cdb[2]); err = 2; goto out; } switch (cdb[3]) { case INQUIRY: case MODE_SENSE: case MODE_SENSE_10: case READ_CAPACITY: case SERVICE_ACTION_IN_16: case REPORT_LUNS: case REQUEST_SENSE: case SYNCHRONIZE_CACHE: case REZERO_UNIT: case SEEK_6: case SEEK_10: case TEST_UNIT_READY: case SEND_DIAGNOSTIC: case MAINTENANCE_IN: case READ_6: case READ_10: case READ_16: case WRITE_6: case WRITE_10: case WRITE_16: case ATA_12: case ATA_16: case VERIFY: case VERIFY_16: case MODE_SELECT: case MODE_SELECT_10: case START_STOP: supported = 3; break; case ZBC_IN: case ZBC_OUT: if (ata_id_zoned_cap(dev->id) || dev->class == ATA_DEV_ZAC) supported = 3; break; case SECURITY_PROTOCOL_IN: case SECURITY_PROTOCOL_OUT: if (dev->flags & ATA_DFLAG_TRUSTED) supported = 3; break; default: break; } out: rbuf[1] = supported; /* supported */ return err; } /** * ata_scsi_report_zones_complete - convert ATA output * @qc: command structure returning the data * * Convert T-13 little-endian field representation into * T-10 big-endian field representation. * What a mess. */ static void ata_scsi_report_zones_complete(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; struct sg_mapping_iter miter; unsigned long flags; unsigned int bytes = 0; sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd), SG_MITER_TO_SG | SG_MITER_ATOMIC); local_irq_save(flags); while (sg_miter_next(&miter)) { unsigned int offset = 0; if (bytes == 0) { char *hdr; u32 list_length; u64 max_lba, opt_lba; u16 same; /* Swizzle header */ hdr = miter.addr; list_length = get_unaligned_le32(&hdr[0]); same = get_unaligned_le16(&hdr[4]); max_lba = get_unaligned_le64(&hdr[8]); opt_lba = get_unaligned_le64(&hdr[16]); put_unaligned_be32(list_length, &hdr[0]); hdr[4] = same & 0xf; put_unaligned_be64(max_lba, &hdr[8]); put_unaligned_be64(opt_lba, &hdr[16]); offset += 64; bytes += 64; } while (offset < miter.length) { char *rec; u8 cond, type, non_seq, reset; u64 size, start, wp; /* Swizzle zone descriptor */ rec = miter.addr + offset; type = rec[0] & 0xf; cond = (rec[1] >> 4) & 0xf; non_seq = (rec[1] & 2); reset = (rec[1] & 1); size = get_unaligned_le64(&rec[8]); start = get_unaligned_le64(&rec[16]); wp = get_unaligned_le64(&rec[24]); rec[0] = type; rec[1] = (cond << 4) | non_seq | reset; put_unaligned_be64(size, &rec[8]); put_unaligned_be64(start, &rec[16]); put_unaligned_be64(wp, &rec[24]); WARN_ON(offset + 64 > miter.length); offset += 64; bytes += 64; } } sg_miter_stop(&miter); local_irq_restore(flags); ata_scsi_qc_complete(qc); } static unsigned int ata_scsi_zbc_in_xlat(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &qc->tf; struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; u16 sect, fp = (u16)-1; u8 sa, options, bp = 0xff; u64 block; u32 n_block; if (unlikely(scmd->cmd_len < 16)) { ata_dev_warn(qc->dev, "invalid cdb length %d\n", scmd->cmd_len); fp = 15; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); if (n_block != scsi_bufflen(scmd)) { ata_dev_warn(qc->dev, "non-matching transfer count (%d/%d)\n", n_block, scsi_bufflen(scmd)); goto invalid_param_len; } sa = cdb[1] & 0x1f; if (sa != ZI_REPORT_ZONES) { ata_dev_warn(qc->dev, "invalid service action %d\n", sa); fp = 1; goto invalid_fld; } /* * ZAC allows only for transfers in 512 byte blocks, * and uses a 16 bit value for the transfer count. */ if ((n_block / 512) > 0xffff || n_block < 512 || (n_block % 512)) { ata_dev_warn(qc->dev, "invalid transfer count %d\n", n_block); goto invalid_param_len; } sect = n_block / 512; options = cdb[14] & 0xbf; if (ata_ncq_enabled(qc->dev) && ata_fpdma_zac_mgmt_in_supported(qc->dev)) { tf->protocol = ATA_PROT_NCQ; tf->command = ATA_CMD_FPDMA_RECV; tf->hob_nsect = ATA_SUBCMD_FPDMA_RECV_ZAC_MGMT_IN & 0x1f; tf->nsect = qc->hw_tag << 3; tf->feature = sect & 0xff; tf->hob_feature = (sect >> 8) & 0xff; tf->auxiliary = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES | (options << 8); } else { tf->command = ATA_CMD_ZAC_MGMT_IN; tf->feature = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES; tf->protocol = ATA_PROT_DMA; tf->hob_feature = options; tf->hob_nsect = (sect >> 8) & 0xff; tf->nsect = sect & 0xff; } tf->device = ATA_LBA; tf->lbah = (block >> 16) & 0xff; tf->lbam = (block >> 8) & 0xff; tf->lbal = block & 0xff; tf->hob_lbah = (block >> 40) & 0xff; tf->hob_lbam = (block >> 32) & 0xff; tf->hob_lbal = (block >> 24) & 0xff; tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48; qc->flags |= ATA_QCFLAG_RESULT_TF; ata_qc_set_pc_nbytes(qc); qc->complete_fn = ata_scsi_report_zones_complete; return 0; invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, bp); return 1; invalid_param_len: /* "Parameter list length error" */ ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); return 1; } static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &qc->tf; struct scsi_cmnd *scmd = qc->scsicmd; struct ata_device *dev = qc->dev; const u8 *cdb = scmd->cmnd; u8 all, sa; u64 block; u32 n_block; u16 fp = (u16)-1; if (unlikely(scmd->cmd_len < 16)) { fp = 15; goto invalid_fld; } sa = cdb[1] & 0x1f; if ((sa != ZO_CLOSE_ZONE) && (sa != ZO_FINISH_ZONE) && (sa != ZO_OPEN_ZONE) && (sa != ZO_RESET_WRITE_POINTER)) { fp = 1; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); if (n_block) { /* * ZAC MANAGEMENT OUT doesn't define any length */ goto invalid_param_len; } all = cdb[14] & 0x1; if (all) { /* * Ignore the block address (zone ID) as defined by ZBC. */ block = 0; } else if (block >= dev->n_sectors) { /* * Block must be a valid zone ID (a zone start LBA). */ fp = 2; goto invalid_fld; } if (ata_ncq_enabled(qc->dev) && ata_fpdma_zac_mgmt_out_supported(qc->dev)) { tf->protocol = ATA_PROT_NCQ_NODATA; tf->command = ATA_CMD_NCQ_NON_DATA; tf->feature = ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT; tf->nsect = qc->hw_tag << 3; tf->auxiliary = sa | ((u16)all << 8); } else { tf->protocol = ATA_PROT_NODATA; tf->command = ATA_CMD_ZAC_MGMT_OUT; tf->feature = sa; tf->hob_feature = all; } tf->lbah = (block >> 16) & 0xff; tf->lbam = (block >> 8) & 0xff; tf->lbal = block & 0xff; tf->hob_lbah = (block >> 40) & 0xff; tf->hob_lbam = (block >> 32) & 0xff; tf->hob_lbal = (block >> 24) & 0xff; tf->device = ATA_LBA; tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48; return 0; invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff); return 1; invalid_param_len: /* "Parameter list length error" */ ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); return 1; } /** * ata_mselect_caching - Simulate MODE SELECT for caching info page * @qc: Storage for translated ATA taskfile * @buf: input buffer * @len: number of valid bytes in the input buffer * @fp: out parameter for the failed field on error * * Prepare a taskfile to modify caching information for the device. * * LOCKING: * None. */ static int ata_mselect_caching(struct ata_queued_cmd *qc, const u8 *buf, int len, u16 *fp) { struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; u8 mpage[CACHE_MPAGE_LEN]; u8 wce; int i; /* * The first two bytes of def_cache_mpage are a header, so offsets * in mpage are off by 2 compared to buf. Same for len. */ if (len != CACHE_MPAGE_LEN - 2) { if (len < CACHE_MPAGE_LEN - 2) *fp = len; else *fp = CACHE_MPAGE_LEN - 2; return -EINVAL; } wce = buf[0] & (1 << 2); /* * Check that read-only bits are not modified. */ ata_msense_caching(dev->id, mpage, false); for (i = 0; i < CACHE_MPAGE_LEN - 2; i++) { if (i == 0) continue; if (mpage[i + 2] != buf[i]) { *fp = i; return -EINVAL; } } tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; tf->protocol = ATA_PROT_NODATA; tf->nsect = 0; tf->command = ATA_CMD_SET_FEATURES; tf->feature = wce ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF; return 0; } /** * ata_mselect_control - Simulate MODE SELECT for control page * @qc: Storage for translated ATA taskfile * @buf: input buffer * @len: number of valid bytes in the input buffer * @fp: out parameter for the failed field on error * * Prepare a taskfile to modify caching information for the device. * * LOCKING: * None. */ static int ata_mselect_control(struct ata_queued_cmd *qc, const u8 *buf, int len, u16 *fp) { struct ata_device *dev = qc->dev; u8 mpage[CONTROL_MPAGE_LEN]; u8 d_sense; int i; /* * The first two bytes of def_control_mpage are a header, so offsets * in mpage are off by 2 compared to buf. Same for len. */ if (len != CONTROL_MPAGE_LEN - 2) { if (len < CONTROL_MPAGE_LEN - 2) *fp = len; else *fp = CONTROL_MPAGE_LEN - 2; return -EINVAL; } d_sense = buf[0] & (1 << 2); /* * Check that read-only bits are not modified. */ ata_msense_control(dev, mpage, false); for (i = 0; i < CONTROL_MPAGE_LEN - 2; i++) { if (i == 0) continue; if (mpage[2 + i] != buf[i]) { *fp = i; return -EINVAL; } } if (d_sense & (1 << 2)) dev->flags |= ATA_DFLAG_D_SENSE; else dev->flags &= ~ATA_DFLAG_D_SENSE; return 0; } /** * ata_scsi_mode_select_xlat - Simulate MODE SELECT 6, 10 commands * @qc: Storage for translated ATA taskfile * * Converts a MODE SELECT command to an ATA SET FEATURES taskfile. * Assume this is invoked for direct access devices (e.g. disks) only. * There should be no block descriptor for other device types. * * LOCKING: * spin_lock_irqsave(host lock) */ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; u8 pg, spg; unsigned six_byte, pg_len, hdr_len, bd_len; int len; u16 fp = (u16)-1; u8 bp = 0xff; u8 buffer[64]; const u8 *p = buffer; VPRINTK("ENTER\n"); six_byte = (cdb[0] == MODE_SELECT); if (six_byte) { if (scmd->cmd_len < 5) { fp = 4; goto invalid_fld; } len = cdb[4]; hdr_len = 4; } else { if (scmd->cmd_len < 9) { fp = 8; goto invalid_fld; } len = (cdb[7] << 8) + cdb[8]; hdr_len = 8; } /* We only support PF=1, SP=0. */ if ((cdb[1] & 0x11) != 0x10) { fp = 1; bp = (cdb[1] & 0x01) ? 1 : 5; goto invalid_fld; } /* Test early for possible overrun. */ if (!scsi_sg_count(scmd) || scsi_sglist(scmd)->length < len) goto invalid_param_len; /* Move past header and block descriptors. */ if (len < hdr_len) goto invalid_param_len; if (!sg_copy_to_buffer(scsi_sglist(scmd), scsi_sg_count(scmd), buffer, sizeof(buffer))) goto invalid_param_len; if (six_byte) bd_len = p[3]; else bd_len = (p[6] << 8) + p[7]; len -= hdr_len; p += hdr_len; if (len < bd_len) goto invalid_param_len; if (bd_len != 0 && bd_len != 8) { fp = (six_byte) ? 3 : 6; fp += bd_len + hdr_len; goto invalid_param; } len -= bd_len; p += bd_len; if (len == 0) goto skip; /* Parse both possible formats for the mode page headers. */ pg = p[0] & 0x3f; if (p[0] & 0x40) { if (len < 4) goto invalid_param_len; spg = p[1]; pg_len = (p[2] << 8) | p[3]; p += 4; len -= 4; } else { if (len < 2) goto invalid_param_len; spg = 0; pg_len = p[1]; p += 2; len -= 2; } /* * No mode subpages supported (yet) but asking for _all_ * subpages may be valid */ if (spg && (spg != ALL_SUB_MPAGES)) { fp = (p[0] & 0x40) ? 1 : 0; fp += hdr_len + bd_len; goto invalid_param; } if (pg_len > len) goto invalid_param_len; switch (pg) { case CACHE_MPAGE: if (ata_mselect_caching(qc, p, pg_len, &fp) < 0) { fp += hdr_len + bd_len; goto invalid_param; } break; case CONTROL_MPAGE: if (ata_mselect_control(qc, p, pg_len, &fp) < 0) { fp += hdr_len + bd_len; goto invalid_param; } else { goto skip; /* No ATA command to send */ } break; default: /* invalid page code */ fp = bd_len + hdr_len; goto invalid_param; } /* * Only one page has changeable data, so we only support setting one * page at a time. */ if (len > pg_len) goto invalid_param; return 0; invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, bp); return 1; invalid_param: ata_scsi_set_invalid_parameter(qc->dev, scmd, fp); return 1; invalid_param_len: /* "Parameter list length error" */ ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); return 1; skip: scmd->result = SAM_STAT_GOOD; return 1; } static u8 ata_scsi_trusted_op(u32 len, bool send, bool dma) { if (len == 0) return ATA_CMD_TRUSTED_NONDATA; else if (send) return dma ? ATA_CMD_TRUSTED_SND_DMA : ATA_CMD_TRUSTED_SND; else return dma ? ATA_CMD_TRUSTED_RCV_DMA : ATA_CMD_TRUSTED_RCV; } static unsigned int ata_scsi_security_inout_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; struct ata_taskfile *tf = &qc->tf; u8 secp = cdb[1]; bool send = (cdb[0] == SECURITY_PROTOCOL_OUT); u16 spsp = get_unaligned_be16(&cdb[2]); u32 len = get_unaligned_be32(&cdb[6]); bool dma = !(qc->dev->flags & ATA_DFLAG_PIO); /* * We don't support the ATA "security" protocol. */ if (secp == 0xef) { ata_scsi_set_invalid_field(qc->dev, scmd, 1, 0); return 1; } if (cdb[4] & 7) { /* INC_512 */ if (len > 0xffff) { ata_scsi_set_invalid_field(qc->dev, scmd, 6, 0); return 1; } } else { if (len > 0x01fffe00) { ata_scsi_set_invalid_field(qc->dev, scmd, 6, 0); return 1; } /* convert to the sector-based ATA addressing */ len = (len + 511) / 512; } tf->protocol = dma ? ATA_PROT_DMA : ATA_PROT_PIO; tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR | ATA_TFLAG_LBA; if (send) tf->flags |= ATA_TFLAG_WRITE; tf->command = ata_scsi_trusted_op(len, send, dma); tf->feature = secp; tf->lbam = spsp & 0xff; tf->lbah = spsp >> 8; if (len) { tf->nsect = len & 0xff; tf->lbal = len >> 8; } else { if (!send) tf->lbah = (1 << 7); } ata_qc_set_pc_nbytes(qc); return 0; } /** * ata_scsi_var_len_cdb_xlat - SATL variable length CDB to Handler * @qc: Command to be translated * * Translate a SCSI variable length CDB to specified commands. * It checks a service action value in CDB to call corresponding handler. * * RETURNS: * Zero on success, non-zero on failure * */ static unsigned int ata_scsi_var_len_cdb_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; const u16 sa = get_unaligned_be16(&cdb[8]); /* * if service action represents a ata pass-thru(32) command, * then pass it to ata_scsi_pass_thru handler. */ if (sa == ATA_32) return ata_scsi_pass_thru(qc); /* unsupported service action */ return 1; } /** * ata_get_xlat_func - check if SCSI to ATA translation is possible * @dev: ATA device * @cmd: SCSI command opcode to consider * * Look up the SCSI command given, and determine whether the * SCSI command is to be translated or simulated. * * RETURNS: * Pointer to translation function if possible, %NULL if not. */ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) { switch (cmd) { case READ_6: case READ_10: case READ_16: case WRITE_6: case WRITE_10: case WRITE_16: return ata_scsi_rw_xlat; case WRITE_SAME_16: return ata_scsi_write_same_xlat; case SYNCHRONIZE_CACHE: if (ata_try_flush_cache(dev)) return ata_scsi_flush_xlat; break; case VERIFY: case VERIFY_16: return ata_scsi_verify_xlat; case ATA_12: case ATA_16: return ata_scsi_pass_thru; case VARIABLE_LENGTH_CMD: return ata_scsi_var_len_cdb_xlat; case MODE_SELECT: case MODE_SELECT_10: return ata_scsi_mode_select_xlat; break; case ZBC_IN: return ata_scsi_zbc_in_xlat; case ZBC_OUT: return ata_scsi_zbc_out_xlat; case SECURITY_PROTOCOL_IN: case SECURITY_PROTOCOL_OUT: if (!(dev->flags & ATA_DFLAG_TRUSTED)) break; return ata_scsi_security_inout_xlat; case START_STOP: return ata_scsi_start_stop_xlat; } return NULL; } /** * ata_scsi_dump_cdb - dump SCSI command contents to dmesg * @ap: ATA port to which the command was being sent * @cmd: SCSI command to dump * * Prints the contents of a SCSI command via printk(). */ void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd) { #ifdef ATA_VERBOSE_DEBUG struct scsi_device *scsidev = cmd->device; VPRINTK("CDB (%u:%d,%d,%lld) %9ph\n", ap->print_id, scsidev->channel, scsidev->id, scsidev->lun, cmd->cmnd); #endif } int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) { u8 scsi_op = scmd->cmnd[0]; ata_xlat_func_t xlat_func; int rc = 0; if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) { if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len)) goto bad_cdb_len; xlat_func = ata_get_xlat_func(dev, scsi_op); } else { if (unlikely(!scmd->cmd_len)) goto bad_cdb_len; xlat_func = NULL; if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { /* relay SCSI command to ATAPI device */ int len = COMMAND_SIZE(scsi_op); if (unlikely(len > scmd->cmd_len || len > dev->cdb_len || scmd->cmd_len > ATAPI_CDB_LEN)) goto bad_cdb_len; xlat_func = atapi_xlat; } else { /* ATA_16 passthru, treat as an ATA command */ if (unlikely(scmd->cmd_len > 16)) goto bad_cdb_len; xlat_func = ata_get_xlat_func(dev, scsi_op); } } if (xlat_func) rc = ata_scsi_translate(dev, scmd, xlat_func); else ata_scsi_simulate(dev, scmd); return rc; bad_cdb_len: DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n", scmd->cmd_len, scsi_op, dev->cdb_len); scmd->result = DID_ERROR << 16; scmd->scsi_done(scmd); return 0; } /** * ata_scsi_queuecmd - Issue SCSI cdb to libata-managed device * @shost: SCSI host of command to be sent * @cmd: SCSI command to be sent * * In some cases, this function translates SCSI commands into * ATA taskfiles, and queues the taskfiles to be sent to * hardware. In other cases, this function simulates a * SCSI device by evaluating and responding to certain * SCSI commands. This creates the overall effect of * ATA and ATAPI devices appearing as SCSI devices. * * LOCKING: * ATA host lock * * RETURNS: * Return value from __ata_scsi_queuecmd() if @cmd can be queued, * 0 otherwise. */ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { struct ata_port *ap; struct ata_device *dev; struct scsi_device *scsidev = cmd->device; int rc = 0; unsigned long irq_flags; ap = ata_shost_to_port(shost); spin_lock_irqsave(ap->lock, irq_flags); ata_scsi_dump_cdb(ap, cmd); dev = ata_scsi_find_dev(ap, scsidev); if (likely(dev)) rc = __ata_scsi_queuecmd(cmd, dev); else { cmd->result = (DID_BAD_TARGET << 16); cmd->scsi_done(cmd); } spin_unlock_irqrestore(ap->lock, irq_flags); return rc; } EXPORT_SYMBOL_GPL(ata_scsi_queuecmd); /** * ata_scsi_simulate - simulate SCSI command on ATA device * @dev: the target device * @cmd: SCSI command being sent to device. * * Interprets and directly executes a select list of SCSI commands * that can be handled internally. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) { struct ata_scsi_args args; const u8 *scsicmd = cmd->cmnd; u8 tmp8; args.dev = dev; args.id = dev->id; args.cmd = cmd; switch(scsicmd[0]) { case INQUIRY: if (scsicmd[1] & 2) /* is CmdDt set? */ ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); else if ((scsicmd[1] & 1) == 0) /* is EVPD clear? */ ata_scsi_rbuf_fill(&args, ata_scsiop_inq_std); else switch (scsicmd[2]) { case 0x00: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_00); break; case 0x80: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_80); break; case 0x83: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_83); break; case 0x89: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89); break; case 0xb0: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b0); break; case 0xb1: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1); break; case 0xb2: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2); break; case 0xb6: if (dev->flags & ATA_DFLAG_ZAC) { ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b6); break; } fallthrough; default: ata_scsi_set_invalid_field(dev, cmd, 2, 0xff); break; } break; case MODE_SENSE: case MODE_SENSE_10: ata_scsi_rbuf_fill(&args, ata_scsiop_mode_sense); break; case READ_CAPACITY: ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap); break; case SERVICE_ACTION_IN_16: if ((scsicmd[1] & 0x1f) == SAI_READ_CAPACITY_16) ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap); else ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); break; case REPORT_LUNS: ata_scsi_rbuf_fill(&args, ata_scsiop_report_luns); break; case REQUEST_SENSE: ata_scsi_set_sense(dev, cmd, 0, 0, 0); cmd->result = (DRIVER_SENSE << 24); break; /* if we reach this, then writeback caching is disabled, * turning this into a no-op. */ case SYNCHRONIZE_CACHE: fallthrough; /* no-op's, complete with success */ case REZERO_UNIT: case SEEK_6: case SEEK_10: case TEST_UNIT_READY: break; case SEND_DIAGNOSTIC: tmp8 = scsicmd[1] & ~(1 << 3); if (tmp8 != 0x4 || scsicmd[3] || scsicmd[4]) ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); break; case MAINTENANCE_IN: if (scsicmd[1] == MI_REPORT_SUPPORTED_OPERATION_CODES) ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in); else ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); break; /* all other commands */ default: ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x20, 0x0); /* "Invalid command operation code" */ break; } cmd->scsi_done(cmd); } int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht) { int i, rc; for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; struct Scsi_Host *shost; rc = -ENOMEM; shost = scsi_host_alloc(sht, sizeof(struct ata_port *)); if (!shost) goto err_alloc; shost->eh_noresume = 1; *(struct ata_port **)&shost->hostdata[0] = ap; ap->scsi_host = shost; shost->transportt = ata_scsi_transport_template; shost->unique_id = ap->print_id; shost->max_id = 16; shost->max_lun = 1; shost->max_channel = 1; shost->max_cmd_len = 32; /* Schedule policy is determined by ->qc_defer() * callback and it needs to see every deferred qc. * Set host_blocked to 1 to prevent SCSI midlayer from * automatically deferring requests. */ shost->max_host_blocked = 1; rc = scsi_add_host_with_dma(shost, &ap->tdev, ap->host->dev); if (rc) goto err_alloc; } return 0; err_alloc: while (--i >= 0) { struct Scsi_Host *shost = host->ports[i]->scsi_host; /* scsi_host_put() is in ata_devres_release() */ scsi_remove_host(shost); } return rc; } #ifdef CONFIG_OF static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap) { struct scsi_device *sdev = dev->sdev; struct device *d = ap->host->dev; struct device_node *np = d->of_node; struct device_node *child; for_each_available_child_of_node(np, child) { int ret; u32 val; ret = of_property_read_u32(child, "reg", &val); if (ret) continue; if (val == dev->devno) { dev_dbg(d, "found matching device node\n"); sdev->sdev_gendev.of_node = child; return; } } } #else static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap) { } #endif void ata_scsi_scan_host(struct ata_port *ap, int sync) { int tries = 5; struct ata_device *last_failed_dev = NULL; struct ata_link *link; struct ata_device *dev; repeat: ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ENABLED) { struct scsi_device *sdev; int channel = 0, id = 0; if (dev->sdev) continue; if (ata_is_host_link(link)) id = dev->devno; else channel = link->pmp; sdev = __scsi_add_device(ap->scsi_host, channel, id, 0, NULL); if (!IS_ERR(sdev)) { dev->sdev = sdev; ata_scsi_assign_ofnode(dev, ap); scsi_device_put(sdev); } else { dev->sdev = NULL; } } } /* If we scanned while EH was in progress or allocation * failure occurred, scan would have failed silently. Check * whether all devices are attached. */ ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ENABLED) { if (!dev->sdev) goto exit_loop; } } exit_loop: if (!link) return; /* we're missing some SCSI devices */ if (sync) { /* If caller requested synchrnous scan && we've made * any progress, sleep briefly and repeat. */ if (dev != last_failed_dev) { msleep(100); last_failed_dev = dev; goto repeat; } /* We might be failing to detect boot device, give it * a few more chances. */ if (--tries) { msleep(100); goto repeat; } ata_port_err(ap, "WARNING: synchronous SCSI scan failed without making any progress, switching to async\n"); } queue_delayed_work(system_long_wq, &ap->hotplug_task, round_jiffies_relative(HZ)); } /** * ata_scsi_offline_dev - offline attached SCSI device * @dev: ATA device to offline attached SCSI device for * * This function is called from ata_eh_hotplug() and responsible * for taking the SCSI device attached to @dev offline. This * function is called with host lock which protects dev->sdev * against clearing. * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: * 1 if attached SCSI device exists, 0 otherwise. */ int ata_scsi_offline_dev(struct ata_device *dev) { if (dev->sdev) { scsi_device_set_state(dev->sdev, SDEV_OFFLINE); return 1; } return 0; } /** * ata_scsi_remove_dev - remove attached SCSI device * @dev: ATA device to remove attached SCSI device for * * This function is called from ata_eh_scsi_hotplug() and * responsible for removing the SCSI device attached to @dev. * * LOCKING: * Kernel thread context (may sleep). */ static void ata_scsi_remove_dev(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; struct scsi_device *sdev; unsigned long flags; /* Alas, we need to grab scan_mutex to ensure SCSI device * state doesn't change underneath us and thus * scsi_device_get() always succeeds. The mutex locking can * be removed if there is __scsi_device_get() interface which * increments reference counts regardless of device state. */ mutex_lock(&ap->scsi_host->scan_mutex); spin_lock_irqsave(ap->lock, flags); /* clearing dev->sdev is protected by host lock */ sdev = dev->sdev; dev->sdev = NULL; if (sdev) { /* If user initiated unplug races with us, sdev can go * away underneath us after the host lock and * scan_mutex are released. Hold onto it. */ if (scsi_device_get(sdev) == 0) { /* The following ensures the attached sdev is * offline on return from ata_scsi_offline_dev() * regardless it wins or loses the race * against this function. */ scsi_device_set_state(sdev, SDEV_OFFLINE); } else { WARN_ON(1); sdev = NULL; } } spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_host->scan_mutex); if (sdev) { ata_dev_info(dev, "detaching (SCSI %s)\n", dev_name(&sdev->sdev_gendev)); scsi_remove_device(sdev); scsi_device_put(sdev); } } static void ata_scsi_handle_link_detach(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_device *dev; ata_for_each_dev(dev, link, ALL) { unsigned long flags; if (!(dev->flags & ATA_DFLAG_DETACHED)) continue; spin_lock_irqsave(ap->lock, flags); dev->flags &= ~ATA_DFLAG_DETACHED; spin_unlock_irqrestore(ap->lock, flags); if (zpodd_dev_enabled(dev)) zpodd_exit(dev); ata_scsi_remove_dev(dev); } } /** * ata_scsi_media_change_notify - send media change event * @dev: Pointer to the disk device with media change event * * Tell the block layer to send a media change notification * event. * * LOCKING: * spin_lock_irqsave(host lock) */ void ata_scsi_media_change_notify(struct ata_device *dev) { if (dev->sdev) sdev_evt_send_simple(dev->sdev, SDEV_EVT_MEDIA_CHANGE, GFP_ATOMIC); } /** * ata_scsi_hotplug - SCSI part of hotplug * @work: Pointer to ATA port to perform SCSI hotplug on * * Perform SCSI part of hotplug. It's executed from a separate * workqueue after EH completes. This is necessary because SCSI * hot plugging requires working EH and hot unplugging is * synchronized with hot plugging with a mutex. * * LOCKING: * Kernel thread context (may sleep). */ void ata_scsi_hotplug(struct work_struct *work) { struct ata_port *ap = container_of(work, struct ata_port, hotplug_task.work); int i; if (ap->pflags & ATA_PFLAG_UNLOADING) { DPRINTK("ENTER/EXIT - unloading\n"); return; } DPRINTK("ENTER\n"); mutex_lock(&ap->scsi_scan_mutex); /* Unplug detached devices. We cannot use link iterator here * because PMP links have to be scanned even if PMP is * currently not attached. Iterate manually. */ ata_scsi_handle_link_detach(&ap->link); if (ap->pmp_link) for (i = 0; i < SATA_PMP_MAX_PORTS; i++) ata_scsi_handle_link_detach(&ap->pmp_link[i]); /* scan for new ones */ ata_scsi_scan_host(ap, 0); mutex_unlock(&ap->scsi_scan_mutex); DPRINTK("EXIT\n"); } /** * ata_scsi_user_scan - indication for user-initiated bus scan * @shost: SCSI host to scan * @channel: Channel to scan * @id: ID to scan * @lun: LUN to scan * * This function is called when user explicitly requests bus * scan. Set probe pending flag and invoke EH. * * LOCKING: * SCSI layer (we don't care) * * RETURNS: * Zero. */ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun) { struct ata_port *ap = ata_shost_to_port(shost); unsigned long flags; int devno, rc = 0; if (!ap->ops->error_handler) return -EOPNOTSUPP; if (lun != SCAN_WILD_CARD && lun) return -EINVAL; if (!sata_pmp_attached(ap)) { if (channel != SCAN_WILD_CARD && channel) return -EINVAL; devno = id; } else { if (id != SCAN_WILD_CARD && id) return -EINVAL; devno = channel; } spin_lock_irqsave(ap->lock, flags); if (devno == SCAN_WILD_CARD) { struct ata_link *link; ata_for_each_link(link, ap, EDGE) { struct ata_eh_info *ehi = &link->eh_info; ehi->probe_mask |= ATA_ALL_DEVICES; ehi->action |= ATA_EH_RESET; } } else { struct ata_device *dev = ata_find_dev(ap, devno); if (dev) { struct ata_eh_info *ehi = &dev->link->eh_info; ehi->probe_mask |= 1 << dev->devno; ehi->action |= ATA_EH_RESET; } else rc = -EINVAL; } if (rc == 0) { ata_port_schedule_eh(ap); spin_unlock_irqrestore(ap->lock, flags); ata_port_wait_eh(ap); } else spin_unlock_irqrestore(ap->lock, flags); return rc; } /** * ata_scsi_dev_rescan - initiate scsi_rescan_device() * @work: Pointer to ATA port to perform scsi_rescan_device() * * After ATA pass thru (SAT) commands are executed successfully, * libata need to propagate the changes to SCSI layer. * * LOCKING: * Kernel thread context (may sleep). */ void ata_scsi_dev_rescan(struct work_struct *work) { struct ata_port *ap = container_of(work, struct ata_port, scsi_rescan_task); struct ata_link *link; struct ata_device *dev; unsigned long flags; mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ENABLED) { struct scsi_device *sdev = dev->sdev; if (!sdev) continue; if (scsi_device_get(sdev)) continue; spin_unlock_irqrestore(ap->lock, flags); scsi_rescan_device(&(sdev->sdev_gendev)); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); } } spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_scan_mutex); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Hash algorithms. * * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_HASH_H #define _CRYPTO_INTERNAL_HASH_H #include <crypto/algapi.h> #include <crypto/hash.h> struct ahash_request; struct scatterlist; struct crypto_hash_walk { char *data; unsigned int offset; unsigned int alignmask; struct page *pg; unsigned int entrylen; unsigned int total; struct scatterlist *sg; unsigned int flags; }; struct ahash_instance { void (*free)(struct ahash_instance *inst); union { struct { char head[offsetof(struct ahash_alg, halg.base)]; struct crypto_instance base; } s; struct ahash_alg alg; }; }; struct shash_instance { void (*free)(struct shash_instance *inst); union { struct { char head[offsetof(struct shash_alg, base)]; struct crypto_instance base; } s; struct shash_alg alg; }; }; struct crypto_ahash_spawn { struct crypto_spawn base; }; struct crypto_shash_spawn { struct crypto_spawn base; }; int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk); static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) { return !(walk->entrylen | walk->total); } int crypto_register_ahash(struct ahash_alg *alg); void crypto_unregister_ahash(struct ahash_alg *alg); int crypto_register_ahashes(struct ahash_alg *algs, int count); void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); bool crypto_shash_alg_has_setkey(struct shash_alg *alg); static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { return crypto_shash_alg_has_setkey(alg) && !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_ahash(struct crypto_ahash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct hash_alg_common *crypto_spawn_ahash_alg( struct crypto_ahash_spawn *spawn) { return __crypto_hash_alg_common(spawn->base.alg); } int crypto_register_shash(struct shash_alg *alg); void crypto_unregister_shash(struct shash_alg *alg); int crypto_register_shashes(struct shash_alg *algs, int count); void crypto_unregister_shashes(struct shash_alg *algs, int count); int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst); void shash_free_singlespawn_instance(struct shash_instance *inst); int crypto_grab_shash(struct crypto_shash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct shash_alg *crypto_spawn_shash_alg( struct crypto_shash_spawn *spawn) { return __crypto_shash_alg(spawn->base.alg); } int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); int crypto_init_shash_ops_async(struct crypto_tfm *tfm); static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) { return crypto_tfm_ctx(crypto_ahash_tfm(tfm)); } static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg) { return container_of(__crypto_hash_alg_common(alg), struct ahash_alg, halg); } static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, unsigned int reqsize) { tfm->reqsize = reqsize; } static inline struct crypto_instance *ahash_crypto_instance( struct ahash_instance *inst) { return &inst->s.base; } static inline struct ahash_instance *ahash_instance( struct crypto_instance *inst) { return container_of(inst, struct ahash_instance, s.base); } static inline struct ahash_instance *ahash_alg_instance( struct crypto_ahash *ahash) { return ahash_instance(crypto_tfm_alg_instance(&ahash->base)); } static inline void *ahash_instance_ctx(struct ahash_instance *inst) { return crypto_instance_ctx(ahash_crypto_instance(inst)); } static inline void ahash_request_complete(struct ahash_request *req, int err) { req->base.complete(&req->base, err); } static inline u32 ahash_request_flags(struct ahash_request *req) { return req->base.flags; } static inline struct crypto_ahash *crypto_spawn_ahash( struct crypto_ahash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline int ahash_enqueue_request(struct crypto_queue *queue, struct ahash_request *request) { return crypto_enqueue_request(queue, &request->base); } static inline struct ahash_request *ahash_dequeue_request( struct crypto_queue *queue) { return ahash_request_cast(crypto_dequeue_request(queue)); } static inline void *crypto_shash_ctx(struct crypto_shash *tfm) { return crypto_tfm_ctx(&tfm->base); } static inline struct crypto_instance *shash_crypto_instance( struct shash_instance *inst) { return &inst->s.base; } static inline struct shash_instance *shash_instance( struct crypto_instance *inst) { return container_of(inst, struct shash_instance, s.base); } static inline struct shash_instance *shash_alg_instance( struct crypto_shash *shash) { return shash_instance(crypto_tfm_alg_instance(&shash->base)); } static inline void *shash_instance_ctx(struct shash_instance *inst) { return crypto_instance_ctx(shash_crypto_instance(inst)); } static inline struct crypto_shash *crypto_spawn_shash( struct crypto_shash_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm) { return crypto_tfm_ctx_aligned(&tfm->base); } static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_shash, base); } #endif /* _CRYPTO_INTERNAL_HASH_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 /* SPDX-License-Identifier: GPL-2.0 */ /** * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H #define MINMAX_H #include <linux/types.h> /* A single data point for our parameterized min-max tracker */ struct minmax_sample { u32 t; /* time measurement was taken */ u32 v; /* value measured */ }; /* State for the parameterized min-max tracker */ struct minmax { struct minmax_sample s[3]; }; static inline u32 minmax_get(const struct minmax *m) { return m->s[0].v; } static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) { struct minmax_sample val = { .t = t, .v = meas }; m->s[2] = m->s[1] = m->s[0] = val; return m->s[0].v; } u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef IOCONTEXT_H #define IOCONTEXT_H #include <linux/radix-tree.h> #include <linux/rcupdate.h> #include <linux/workqueue.h> enum { ICQ_EXITED = 1 << 2, ICQ_DESTROYED = 1 << 3, }; /* * An io_cq (icq) is association between an io_context (ioc) and a * request_queue (q). This is used by elevators which need to track * information per ioc - q pair. * * Elevator can request use of icq by setting elevator_type->icq_size and * ->icq_align. Both size and align must be larger than that of struct * io_cq and elevator can use the tail area for private information. The * recommended way to do this is defining a struct which contains io_cq as * the first member followed by private members and using its size and * align. For example, * * struct snail_io_cq { * struct io_cq icq; * int poke_snail; * int feed_snail; * }; * * struct elevator_type snail_elv_type { * .ops = { ... }, * .icq_size = sizeof(struct snail_io_cq), * .icq_align = __alignof__(struct snail_io_cq), * ... * }; * * If icq_size is set, block core will manage icq's. All requests will * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() * is called and be holding a reference to the associated io_context. * * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is * called and, on destruction, ->elevator_exit_icq_fn(). Both functions * are called with both the associated io_context and queue locks held. * * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding * queue lock but the returned icq is valid only until the queue lock is * released. Elevators can not and should not try to create or destroy * icq's. * * As icq's are linked from both ioc and q, the locking rules are a bit * complex. * * - ioc lock nests inside q lock. * * - ioc->icq_list and icq->ioc_node are protected by ioc lock. * q->icq_list and icq->q_node by q lock. * * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq * itself is protected by q lock. However, both the indexes and icq * itself are also RCU managed and lookup can be performed holding only * the q lock. * * - icq's are not reference counted. They are destroyed when either the * ioc or q goes away. Each request with icq set holds an extra * reference to ioc to ensure it stays until the request is completed. * * - Linking and unlinking icq's are performed while holding both ioc and q * locks. Due to the lock ordering, q exit is simple but ioc exit * requires reverse-order double lock dance. */ struct io_cq { struct request_queue *q; struct io_context *ioc; /* * q_node and ioc_node link io_cq through icq_list of q and ioc * respectively. Both fields are unused once ioc_exit_icq() is * called and shared with __rcu_icq_cache and __rcu_head which are * used for RCU free of io_cq. */ union { struct list_head q_node; struct kmem_cache *__rcu_icq_cache; }; union { struct hlist_node ioc_node; struct rcu_head __rcu_head; }; unsigned int flags; }; /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. */ struct io_context { atomic_long_t refcount; atomic_t active_ref; atomic_t nr_tasks; /* all the fields below are protected by this lock */ spinlock_t lock; unsigned short ioprio; struct radix_tree_root icq_tree; struct io_cq __rcu *icq_hint; struct hlist_head icq_list; struct work_struct release_work; }; /** * get_io_context_active - get active reference on ioc * @ioc: ioc of interest * * Only iocs with active reference can issue new IOs. This function * acquires an active reference on @ioc. The caller must already have an * active reference on @ioc. */ static inline void get_io_context_active(struct io_context *ioc) { WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0); atomic_long_inc(&ioc->refcount); atomic_inc(&ioc->active_ref); } static inline void ioc_task_link(struct io_context *ioc) { get_io_context_active(ioc); WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); atomic_inc(&ioc->nr_tasks); } struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } static inline void exit_io_context(struct task_struct *task) { } #endif #endif
1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/stat.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/export.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/highuid.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/compat.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include "internal.h" #include "mount.h" /** * generic_fillattr - Fill in the basic attributes from the inode struct * @inode: Inode to use as the source * @stat: Where to fill in the attributes * * Fill in the basic attributes in the kstat structure from data that's to be * found on the VFS inode structure. This is the default if no getattr inode * operation is supplied. */ void generic_fillattr(struct inode *inode, struct kstat *stat) { stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; } EXPORT_SYMBOL(generic_fillattr); /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from * @stat: structure to return attributes in * @request_mask: STATX_xxx flags indicating what the caller wants * @query_flags: Query mode (AT_STATX_SYNC_TYPE) * * Get attributes without calling security_inode_getattr. * * Currently the only caller other than vfs_getattr is internal to the * filehandle lookup code, which uses only the inode number and returns no * attributes to any user. Any other code probably wants vfs_getattr. */ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_backing_inode(path->dentry); memset(stat, 0, sizeof(*stat)); stat->result_mask |= STATX_BASIC_STATS; query_flags &= AT_STATX_SYNC_TYPE; /* allow the fs to override these if it really wants to */ /* SB_NOATIME means filesystem supplies dummy atime value */ if (inode->i_sb->s_flags & SB_NOATIME) stat->result_mask &= ~STATX_ATIME; /* * Note: If you add another clause to set an attribute flag, please * update attributes_mask below. */ if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; if (IS_DAX(inode)) stat->attributes |= STATX_ATTR_DAX; stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); if (inode->i_op->getattr) return inode->i_op->getattr(path, stat, request_mask, query_flags); generic_fillattr(inode, stat); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); /* * vfs_getattr - Get the enhanced basic attributes of a file * @path: The file of interest * @stat: Where to return the statistics * @request_mask: STATX_xxx flags indicating what the caller wants * @query_flags: Query mode (AT_STATX_SYNC_TYPE) * * Ask the filesystem for a file's attributes. The caller must indicate in * request_mask and query_flags to indicate what they want. * * If the file is remote, the filesystem can be forced to update the attributes * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can * suppress the update by passing AT_STATX_DONT_SYNC. * * Bits must have been set in request_mask to indicate which attributes the * caller wants retrieving. Any such attribute not requested may be returned * anyway, but the value may be approximate, and, if remote, may not have been * synchronised with the server. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { int retval; retval = security_inode_getattr(path); if (retval) return retval; return vfs_getattr_nosec(path, stat, request_mask, query_flags); } EXPORT_SYMBOL(vfs_getattr); /** * vfs_fstat - Get the basic attributes by file descriptor * @fd: The file descriptor referring to the file of interest * @stat: The result structure to fill in. * * This function is a wrapper around vfs_getattr(). The main difference is * that it uses a file descriptor to determine the file location. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_fstat(int fd, struct kstat *stat) { struct fd f; int error; f = fdget_raw(fd); if (!f.file) return -EBADF; error = vfs_getattr(&f.file->f_path, stat, STATX_BASIC_STATS, 0); fdput(f); return error; } /** * vfs_statx - Get basic and extra attributes by filename * @dfd: A file descriptor representing the base dir for a relative filename * @filename: The name of the file of interest * @flags: Flags to control the query * @stat: The result structure to fill in. * @request_mask: STATX_xxx flags indicating what the caller wants * * This function is a wrapper around vfs_getattr(). The main difference is * that it uses a filename and base directory to determine the file location. * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink * at the given name from being referenced. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ static int vfs_statx(int dfd, const char __user *filename, int flags, struct kstat *stat, u32 request_mask) { struct path path; unsigned lookup_flags = 0; int error; if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH | AT_STATX_SYNC_TYPE)) return -EINVAL; if (!(flags & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; if (!(flags & AT_NO_AUTOMOUNT)) lookup_flags |= LOOKUP_AUTOMOUNT; if (flags & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; error = vfs_getattr(&path, stat, request_mask, flags); stat->mnt_id = real_mount(path.mnt)->mnt_id; stat->result_mask |= STATX_MNT_ID; if (path.mnt->mnt_root == path.dentry) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } #ifdef __ARCH_WANT_OLD_STAT /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * statbuf) { static int warncount = 5; struct __old_kernel_stat tmp; if (warncount > 0) { warncount--; printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n", current->comm); } else if (warncount < 0) { /* it's laughable, but... */ warncount = 0; } memset(&tmp, 0, sizeof(struct __old_kernel_stat)); tmp.st_dev = old_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = old_encode_dev(stat->rdev); #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; #endif tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_ctime = stat->ctime.tv_sec; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(stat, const char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (error) return error; return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, const char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (error) return error; return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_old_stat(&stat, statbuf); return error; } #endif /* __ARCH_WANT_OLD_STAT */ #ifdef __ARCH_WANT_NEW_STAT #if BITS_PER_LONG == 32 # define choose_32_64(a,b) a #else # define choose_32_64(a,b) b #endif #define valid_dev(x) choose_32_64(old_valid_dev(x),true) #define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; #endif INIT_STRUCT_STAT_PADDING(tmp); tmp.st_dev = encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_ctime = stat->ctime.tv_sec; #ifdef STAT_HAVE_NSEC tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; #endif tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(newstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error = vfs_stat(filename, &stat); if (error) return error; return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (error) return error; return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_new_stat(&stat, statbuf); } #endif SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_new_stat(&stat, statbuf); return error; } #endif static int do_readlinkat(int dfd, const char __user *pathname, char __user *buf, int bufsiz) { struct path path; int error; int empty = 0; unsigned int lookup_flags = LOOKUP_EMPTY; if (bufsiz <= 0) return -EINVAL; retry: error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty); if (!error) { struct inode *inode = d_backing_inode(path.dentry); error = empty ? -ENOENT : -EINVAL; /* * AFS mountpoints allow readlink(2) but are not symlinks */ if (d_is_symlink(path.dentry) || inode->i_op->readlink) { error = security_inode_readlink(path.dentry); if (!error) { touch_atime(&path); error = vfs_readlink(path.dentry, buf, bufsiz); } } path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } } return error; } SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, char __user *, buf, int, bufsiz) { return do_readlinkat(dfd, pathname, buf, bufsiz); } SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, int, bufsiz) { return do_readlinkat(AT_FDCWD, path, buf, bufsiz); } /* ---------- LFS-64 ----------- */ #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) #ifndef INIT_STRUCT_STAT64_PADDING # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) #endif static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) { struct stat64 tmp; INIT_STRUCT_STAT64_PADDING(tmp); #ifdef CONFIG_MIPS /* mips has weird padding, so we don't get 64 bits there */ tmp.st_dev = new_encode_dev(stat->dev); tmp.st_rdev = new_encode_dev(stat->rdev); #else tmp.st_dev = huge_encode_dev(stat->dev); tmp.st_rdev = huge_encode_dev(stat->rdev); #endif tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; #ifdef STAT64_HAS_BROKEN_ST_INO tmp.__st_ino = stat->ino; #endif tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime = stat->ctime.tv_sec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; tmp.st_size = stat->size; tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(stat64, const char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_stat(filename, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE2(lstat64, const char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_lstat(filename, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ static noinline_for_stack int cp_statx(const struct kstat *stat, struct statx __user *buffer) { struct statx tmp; memset(&tmp, 0, sizeof(tmp)); tmp.stx_mask = stat->result_mask; tmp.stx_blksize = stat->blksize; tmp.stx_attributes = stat->attributes; tmp.stx_nlink = stat->nlink; tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.stx_mode = stat->mode; tmp.stx_ino = stat->ino; tmp.stx_size = stat->size; tmp.stx_blocks = stat->blocks; tmp.stx_attributes_mask = stat->attributes_mask; tmp.stx_atime.tv_sec = stat->atime.tv_sec; tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; tmp.stx_btime.tv_sec = stat->btime.tv_sec; tmp.stx_btime.tv_nsec = stat->btime.tv_nsec; tmp.stx_ctime.tv_sec = stat->ctime.tv_sec; tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec; tmp.stx_mtime.tv_sec = stat->mtime.tv_sec; tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec; tmp.stx_rdev_major = MAJOR(stat->rdev); tmp.stx_rdev_minor = MINOR(stat->rdev); tmp.stx_dev_major = MAJOR(stat->dev); tmp.stx_dev_minor = MINOR(stat->dev); tmp.stx_mnt_id = stat->mnt_id; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } int do_statx(int dfd, const char __user *filename, unsigned flags, unsigned int mask, struct statx __user *buffer) { struct kstat stat; int error; if (mask & STATX__RESERVED) return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; return cp_statx(&stat, buffer); } /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. * @filename: File to stat or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * * Note that fstat() can be emulated by setting dfd to the fd of interest, * supplying "" as the filename and setting AT_EMPTY_PATH in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, unsigned int, mask, struct statx __user *, buffer) { return do_statx(dfd, filename, flags, mask, buffer); } #ifdef CONFIG_COMPAT static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); tmp.st_dev = old_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = old_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime = stat->ctime.tv_sec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, struct compat_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (error) return error; return cp_compat_stat(&stat, statbuf); } COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, struct compat_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (error) return error; return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, const char __user *, filename, struct compat_stat __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_compat_stat(&stat, statbuf); } #endif COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct compat_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_compat_stat(&stat, statbuf); return error; } #endif /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) { inode->i_blocks += bytes >> 9; bytes &= 511; inode->i_bytes += bytes; if (inode->i_bytes >= 512) { inode->i_blocks++; inode->i_bytes -= 512; } } EXPORT_SYMBOL(__inode_add_bytes); void inode_add_bytes(struct inode *inode, loff_t bytes) { spin_lock(&inode->i_lock); __inode_add_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_add_bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes) { inode->i_blocks -= bytes >> 9; bytes &= 511; if (inode->i_bytes < bytes) { inode->i_blocks--; inode->i_bytes += 512; } inode->i_bytes -= bytes; } EXPORT_SYMBOL(__inode_sub_bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes) { spin_lock(&inode->i_lock); __inode_sub_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_sub_bytes); loff_t inode_get_bytes(struct inode *inode) { loff_t ret; spin_lock(&inode->i_lock); ret = __inode_get_bytes(inode); spin_unlock(&inode->i_lock); return ret; } EXPORT_SYMBOL(inode_get_bytes); void inode_set_bytes(struct inode *inode, loff_t bytes) { /* Caller is here responsible for sufficient locking * (ie. inode->i_lock) */ inode->i_blocks = bytes >> 9; inode->i_bytes = bytes & 511; } EXPORT_SYMBOL(inode_set_bytes);
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 // SPDX-License-Identifier: GPL-2.0 /* * event tracer * * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * * - Added format output of fields of the trace point. * This was based off of work by Tom Zanussi <tzanussi@gmail.com>. * */ #define pr_fmt(fmt) fmt #include <linux/workqueue.h> #include <linux/security.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/tracefs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/sort.h> #include <linux/slab.h> #include <linux/delay.h> #include <trace/events/sched.h> #include <trace/syscall.h> #include <asm/setup.h> #include "trace_output.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM "TRACE_SYSTEM" DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); static bool eventdir_initialized; #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; static inline int system_refcount(struct event_subsystem *system) { return system->ref_count; } static int system_refcount_inc(struct event_subsystem *system) { return system->ref_count++; } static int system_refcount_dec(struct event_subsystem *system) { return --system->ref_count; } /* Double loops, do not use break, only goto's work */ #define do_for_each_event_file(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ list_for_each_entry(file, &tr->events, list) #define do_for_each_event_file_safe(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ struct trace_event_file *___n; \ list_for_each_entry_safe(file, ___n, &tr->events, list) #define while_for_each_event_file() \ } static struct ftrace_event_field * __find_event_field(struct list_head *head, char *name) { struct ftrace_event_field *field; list_for_each_entry(field, head, link) { if (!strcmp(field->name, name)) return field; } return NULL; } struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name) { struct ftrace_event_field *field; struct list_head *head; head = trace_get_fields(call); field = __find_event_field(head, name); if (field) return field; field = __find_event_field(&ftrace_generic_fields, name); if (field) return field; return __find_event_field(&ftrace_common_fields, name); } static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, int is_signed, int filter_type) { struct ftrace_event_field *field; field = kmem_cache_alloc(field_cachep, GFP_TRACE); if (!field) return -ENOMEM; field->name = name; field->type = type; if (filter_type == FILTER_OTHER) field->filter_type = filter_assign_type(type); else field->filter_type = filter_type; field->offset = offset; field->size = size; field->is_signed = is_signed; list_add(&field->link, head); return 0; } int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type) { struct list_head *head; if (WARN_ON(!call->class)) return 0; head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, is_signed, filter_type); } EXPORT_SYMBOL_GPL(trace_define_field); #define __generic_field(type, item, filter_type) \ ret = __trace_define_field(&ftrace_generic_fields, #type, \ #item, 0, 0, is_signed_type(type), \ filter_type); \ if (ret) \ return ret; #define __common_field(type, item) \ ret = __trace_define_field(&ftrace_common_fields, #type, \ "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; static int trace_define_generic_fields(void) { int ret; __generic_field(int, CPU, FILTER_CPU); __generic_field(int, cpu, FILTER_CPU); __generic_field(char *, COMM, FILTER_COMM); __generic_field(char *, comm, FILTER_COMM); return ret; } static int trace_define_common_fields(void) { int ret; struct trace_entry ent; __common_field(unsigned short, type); __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); return ret; } static void trace_destroy_fields(struct trace_event_call *call) { struct ftrace_event_field *field, *next; struct list_head *head; head = trace_get_fields(call); list_for_each_entry_safe(field, next, head, link) { list_del(&field->link); kmem_cache_free(field_cachep, field); } } /* * run-time version of trace_event_get_offsets_<call>() that returns the last * accessible offset of trace fields excluding __dynamic_array bytes */ int trace_event_get_offsets(struct trace_event_call *call) { struct ftrace_event_field *tail; struct list_head *head; head = trace_get_fields(call); /* * head->next points to the last field with the largest offset, * since it was added last by trace_define_field() */ tail = list_first_entry(head, struct ftrace_event_field, link); return tail->offset + tail->size; } int trace_event_raw_init(struct trace_event_call *call) { int id; id = register_trace_event(&call->event); if (!id) return -ENODEV; return 0; } EXPORT_SYMBOL_GPL(trace_event_raw_init); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) { struct trace_array *tr = trace_file->tr; struct trace_array_cpu *data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; pid_list = rcu_dereference_raw(tr->filtered_pids); no_pid_list = rcu_dereference_raw(tr->filtered_no_pids); if (!pid_list && !no_pid_list) return false; data = this_cpu_ptr(tr->array_buffer.data); return data->ignore_pid; } EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len) { struct trace_event_call *event_call = trace_file->event_call; if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && trace_event_ignore_this_pid(trace_file)) return NULL; local_save_flags(fbuffer->flags); fbuffer->pc = preempt_count(); /* * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ if (IS_ENABLED(CONFIG_PREEMPTION)) fbuffer->pc--; fbuffer->trace_file = trace_file; fbuffer->event = trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, fbuffer->flags, fbuffer->pc); if (!fbuffer->event) return NULL; fbuffer->regs = NULL; fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); int trace_event_reg(struct trace_event_call *call, enum trace_reg type, void *data) { struct trace_event_file *file = data; WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { case TRACE_REG_REGISTER: return tracepoint_probe_register(call->tp, call->class->probe, file); case TRACE_REG_UNREGISTER: tracepoint_probe_unregister(call->tp, call->class->probe, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: return tracepoint_probe_register(call->tp, call->class->perf_probe, call); case TRACE_REG_PERF_UNREGISTER: tracepoint_probe_unregister(call->tp, call->class->perf_probe, call); return 0; case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: case TRACE_REG_PERF_DEL: return 0; #endif } return 0; } EXPORT_SYMBOL_GPL(trace_event_reg); void trace_event_enable_cmd_record(bool enable) { struct trace_event_file *file; struct trace_array *tr; lockdep_assert_held(&event_mutex); do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_cmdline_record(); set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } else { tracing_stop_cmdline_record(); clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); } void trace_event_enable_tgid_record(bool enable) { struct trace_event_file *file; struct trace_array *tr; lockdep_assert_held(&event_mutex); do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_tgid_record(); set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } else { tracing_stop_tgid_record(); clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } } while_for_each_event_file(); } static int __ftrace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; unsigned long file_flags = file->flags; int ret = 0; int disable; switch (enable) { case 0: /* * When soft_disable is set and enable is cleared, the sm_ref * reference counter is decremented. If it reaches 0, we want * to clear the SOFT_DISABLED flag but leave the event in the * state that it was. That is, if the event was enabled and * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED * is set we do not want the event to be enabled before we * clear the bit. * * When soft_disable is not set but the SOFT_MODE flag is, * we do nothing. Do not disable the tracepoint, otherwise * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. */ if (soft_disable) { if (atomic_dec_return(&file->sm_ref) > 0) break; disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); } else disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { tracing_stop_tgid_record(); clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } call->class->reg(call, TRACE_REG_UNREGISTER, file); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & EVENT_FILE_FL_SOFT_MODE) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* * When soft_disable is set and enable is set, we want to * register the tracepoint for the event, but leave the event * as is. That means, if the event was already enabled, we do * nothing (but set SOFT_MODE). If the event is disabled, we * set SOFT_DISABLED before enabling the event tracepoint, so * it still seems to be disabled. */ if (!soft_disable) clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else { if (atomic_inc_return(&file->sm_ref) > 1) break; set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); } if (!(file->flags & EVENT_FILE_FL_ENABLED)) { bool cmd = false, tgid = false; /* Keep the event disabled, when going to SOFT_MODE. */ if (soft_disable) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { cmd = true; tracing_start_cmdline_record(); set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { tgid = true; tracing_start_tgid_record(); set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { if (cmd) tracing_stop_cmdline_record(); if (tgid) tracing_stop_tgid_record(); pr_info("event trace: Could not enable event " "%s\n", trace_event_name(call)); break; } set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); } break; } /* Enable or disable use of trace_buffered_event */ if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) != (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) { if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) trace_buffered_event_enable(); else trace_buffered_event_disable(); } return ret; } int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { return __ftrace_event_enable_disable(file, enable, soft_disable); } static int ftrace_event_enable_disable(struct trace_event_file *file, int enable) { return __ftrace_event_enable_disable(file, enable, 0); } static void ftrace_clear_events(struct trace_array *tr) { struct trace_event_file *file; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { ftrace_event_enable_disable(file, 0); } mutex_unlock(&event_mutex); } static void event_filter_pid_sched_process_exit(void *data, struct task_struct *task) { struct trace_pid_list *pid_list; struct trace_array *tr = data; pid_list = rcu_dereference_raw(tr->filtered_pids); trace_filter_add_remove_task(pid_list, NULL, task); pid_list = rcu_dereference_raw(tr->filtered_no_pids); trace_filter_add_remove_task(pid_list, NULL, task); } static void event_filter_pid_sched_process_fork(void *data, struct task_struct *self, struct task_struct *task) { struct trace_pid_list *pid_list; struct trace_array *tr = data; pid_list = rcu_dereference_sched(tr->filtered_pids); trace_filter_add_remove_task(pid_list, self, task); pid_list = rcu_dereference_sched(tr->filtered_no_pids); trace_filter_add_remove_task(pid_list, self, task); } void trace_event_follow_fork(struct trace_array *tr, bool enable) { if (enable) { register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, tr, INT_MIN); register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, tr, INT_MAX); } else { unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, tr); unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, tr); } } static void event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; bool ret; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); /* * Sched switch is funny, as we only want to ignore it * in the notrace case if both prev and next should be ignored. */ ret = trace_ignore_this_task(NULL, no_pid_list, prev) && trace_ignore_this_task(NULL, no_pid_list, next); this_cpu_write(tr->array_buffer.data->ignore_pid, ret || (trace_ignore_this_task(pid_list, NULL, prev) && trace_ignore_this_task(pid_list, NULL, next))); } static void event_filter_pid_sched_switch_probe_post(void *data, bool preempt, struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, next)); } static void event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; /* Nothing to do if we are already tracing */ if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, task)); } static void event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; /* Nothing to do if we are not tracing */ if (this_cpu_read(tr->array_buffer.data->ignore_pid)) return; pid_list = rcu_dereference_sched(tr->filtered_pids); no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); /* Set tracing if current is enabled */ this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, current)); } static void unregister_pid_events(struct trace_array *tr) { unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); } static void __ftrace_clear_event_pids(struct trace_array *tr, int type) { struct trace_pid_list *pid_list; struct trace_pid_list *no_pid_list; struct trace_event_file *file; int cpu; pid_list = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); /* Make sure there's something to do */ if (!pid_type_enabled(type, pid_list, no_pid_list)) return; if (!still_need_pid_events(type, pid_list, no_pid_list)) { unregister_pid_events(tr); list_for_each_entry(file, &tr->events, list) { clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); } for_each_possible_cpu(cpu) per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; } if (type & TRACE_PIDS) rcu_assign_pointer(tr->filtered_pids, NULL); if (type & TRACE_NO_PIDS) rcu_assign_pointer(tr->filtered_no_pids, NULL); /* Wait till all users are no longer using pid filtering */ tracepoint_synchronize_unregister(); if ((type & TRACE_PIDS) && pid_list) trace_free_pid_list(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) trace_free_pid_list(no_pid_list); } static void ftrace_clear_event_pids(struct trace_array *tr, int type) { mutex_lock(&event_mutex); __ftrace_clear_event_pids(tr, type); mutex_unlock(&event_mutex); } static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; WARN_ON_ONCE(system_refcount(system) == 0); if (system_refcount_dec(system)) return; list_del(&system->list); if (filter) { kfree(filter->filter_string); kfree(filter); } kfree_const(system->name); kfree(system); } static void __get_system(struct event_subsystem *system) { WARN_ON_ONCE(system_refcount(system) == 0); system_refcount_inc(system); } static void __get_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); dir->ref_count++; __get_system(dir->subsystem); } static void __put_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); __put_system(dir->subsystem); if (!--dir->ref_count) kfree(dir); } static void put_system(struct trace_subsystem_dir *dir) { mutex_lock(&event_mutex); __put_system_dir(dir); mutex_unlock(&event_mutex); } static void remove_subsystem(struct trace_subsystem_dir *dir) { if (!dir) return; if (!--dir->nr_events) { tracefs_remove(dir->entry); list_del(&dir->list); __put_system_dir(dir); } } static void remove_event_file_dir(struct trace_event_file *file) { struct dentry *dir = file->dir; struct dentry *child; if (dir) { spin_lock(&dir->d_lock); /* probably unneeded */ list_for_each_entry(child, &dir->d_subdirs, d_child) { if (d_really_is_positive(child)) /* probably unneeded */ d_inode(child)->i_private = NULL; } spin_unlock(&dir->d_lock); tracefs_remove(dir); } list_del(&file->list); remove_subsystem(file->system); free_event_filter(file->filter); kmem_cache_free(file_cachep, file); } /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ static int __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, const char *sub, const char *event, int set) { struct trace_event_file *file; struct trace_event_call *call; const char *name; int ret = -EINVAL; int eret = 0; list_for_each_entry(file, &tr->events, list) { call = file->event_call; name = trace_event_name(call); if (!name || !call->class || !call->class->reg) continue; if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) continue; if (match && strcmp(match, name) != 0 && strcmp(match, call->class->system) != 0) continue; if (sub && strcmp(sub, call->class->system) != 0) continue; if (event && strcmp(event, name) != 0) continue; ret = ftrace_event_enable_disable(file, set); /* * Save the first error and return that. Some events * may still have been enabled, but let the user * know that something went wrong. */ if (ret && !eret) eret = ret; ret = eret; } return ret; } static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, const char *sub, const char *event, int set) { int ret; mutex_lock(&event_mutex); ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); mutex_unlock(&event_mutex); return ret; } int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; int ret; if (!tr) return -ENOENT; /* * The buf format can be <subsystem>:<event-name> * *:<event-name> means any event by that name. * :<event-name> is the same. * * <subsystem>:* means all events in that subsystem * <subsystem>: means the same. * * <name> (no ':') means all events in a subsystem with * the name <name> or any event that matches <name> */ match = strsep(&buf, ":"); if (buf) { sub = match; event = buf; match = NULL; if (!strlen(sub) || strcmp(sub, "*") == 0) sub = NULL; if (!strlen(event) || strcmp(event, "*") == 0) event = NULL; } ret = __ftrace_set_clr_event(tr, match, sub, event, set); /* Put back the colon to allow this to be called again */ if (buf) *(buf - 1) = ':'; return ret; } /** * trace_set_clr_event - enable or disable an event * @system: system name to match (NULL for any system) * @event: event name to match (NULL for all events, within system) * @set: 1 to enable, 0 to disable * * This is a way for other parts of the kernel to enable or disable * event recording. * * Returns 0 on success, -EINVAL if the parameters do not match any * registered events. */ int trace_set_clr_event(const char *system, const char *event, int set) { struct trace_array *tr = top_trace_array(); if (!tr) return -ENODEV; return __ftrace_set_clr_event(tr, NULL, system, event, set); } EXPORT_SYMBOL_GPL(trace_set_clr_event); /** * trace_array_set_clr_event - enable or disable an event for a trace array. * @tr: concerned trace array. * @system: system name to match (NULL for any system) * @event: event name to match (NULL for all events, within system) * @enable: true to enable, false to disable * * This is a way for other parts of the kernel to enable or disable * event recording. * * Returns 0 on success, -EINVAL if the parameters do not match any * registered events. */ int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable) { int set; if (!tr) return -ENOENT; set = (enable == true) ? 1 : 0; return __ftrace_set_clr_event(tr, NULL, system, event, set); } EXPORT_SYMBOL_GPL(trace_array_set_clr_event); /* 128 should be much more than enough */ #define EVENT_BUF_SIZE 127 static ssize_t ftrace_event_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_parser parser; struct seq_file *m = file->private_data; struct trace_array *tr = m->private; ssize_t read, ret; if (!cnt) return 0; ret = tracing_update_buffers(); if (ret < 0) return ret; if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) return -ENOMEM; read = trace_get_user(&parser, ubuf, cnt, ppos); if (read >= 0 && trace_parser_loaded((&parser))) { int set = 1; if (*parser.buffer == '!') set = 0; ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); if (ret) goto out_put; } ret = read; out_put: trace_parser_put(&parser); return ret; } static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_event_file *file = v; struct trace_event_call *call; struct trace_array *tr = m->private; (*pos)++; list_for_each_entry_continue(file, &tr->events, list) { call = file->event_call; /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ if (call->class && call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } return NULL; } static void *t_start(struct seq_file *m, loff_t *pos) { struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = t_next(m, file, &l); if (!file) break; } return file; } static void * s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_event_file *file = v; struct trace_array *tr = m->private; (*pos)++; list_for_each_entry_continue(file, &tr->events, list) { if (file->flags & EVENT_FILE_FL_ENABLED) return file; } return NULL; } static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = s_next(m, file, &l); if (!file) break; } return file; } static int t_show(struct seq_file *m, void *v) { struct trace_event_file *file = v; struct trace_event_call *call = file->event_call; if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); seq_printf(m, "%s\n", trace_event_name(call)); return 0; } static void t_stop(struct seq_file *m, void *p) { mutex_unlock(&event_mutex); } static void * __next(struct seq_file *m, void *v, loff_t *pos, int type) { struct trace_array *tr = m->private; struct trace_pid_list *pid_list; if (type == TRACE_PIDS) pid_list = rcu_dereference_sched(tr->filtered_pids); else pid_list = rcu_dereference_sched(tr->filtered_no_pids); return trace_pid_next(pid_list, v, pos); } static void * p_next(struct seq_file *m, void *v, loff_t *pos) { return __next(m, v, pos, TRACE_PIDS); } static void * np_next(struct seq_file *m, void *v, loff_t *pos) { return __next(m, v, pos, TRACE_NO_PIDS); } static void *__start(struct seq_file *m, loff_t *pos, int type) __acquires(RCU) { struct trace_pid_list *pid_list; struct trace_array *tr = m->private; /* * Grab the mutex, to keep calls to p_next() having the same * tr->filtered_pids as p_start() has. * If we just passed the tr->filtered_pids around, then RCU would * have been enough, but doing that makes things more complex. */ mutex_lock(&event_mutex); rcu_read_lock_sched(); if (type == TRACE_PIDS) pid_list = rcu_dereference_sched(tr->filtered_pids); else pid_list = rcu_dereference_sched(tr->filtered_no_pids); if (!pid_list) return NULL; return trace_pid_start(pid_list, pos); } static void *p_start(struct seq_file *m, loff_t *pos) __acquires(RCU) { return __start(m, pos, TRACE_PIDS); } static void *np_start(struct seq_file *m, loff_t *pos) __acquires(RCU) { return __start(m, pos, TRACE_NO_PIDS); } static void p_stop(struct seq_file *m, void *p) __releases(RCU) { rcu_read_unlock_sched(); mutex_unlock(&event_mutex); } static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; unsigned long flags; char buf[4] = "0"; mutex_lock(&event_mutex); file = event_file_data(filp); if (likely(file)) flags = file->flags; mutex_unlock(&event_mutex); if (!file) return -ENODEV; if (flags & EVENT_FILE_FL_ENABLED && !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); if (flags & EVENT_FILE_FL_SOFT_DISABLED || flags & EVENT_FILE_FL_SOFT_MODE) strcat(buf, "*"); strcat(buf, "\n"); return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); } static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; ret = tracing_update_buffers(); if (ret < 0) return ret; switch (val) { case 0: case 1: ret = -ENODEV; mutex_lock(&event_mutex); file = event_file_data(filp); if (likely(file)) ret = ftrace_event_enable_disable(file, val); mutex_unlock(&event_mutex); break; default: return -EINVAL; } *ppos += cnt; return ret ? ret : cnt; } static ssize_t system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { const char set_to_char[4] = { '?', '0', '1', 'X' }; struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_event_call *call; struct trace_event_file *file; struct trace_array *tr = dir->tr; char buf[2]; int set = 0; int ret; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || !trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) continue; /* * We need to find out if all the events are set * or if all events or cleared, or if we have * a mixture. */ set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); /* * If we have a mixture, no need to look further. */ if (set == 3) break; } mutex_unlock(&event_mutex); buf[0] = set_to_char[set]; buf[1] = '\n'; ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); return ret; } static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; const char *name = NULL; unsigned long val; ssize_t ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; ret = tracing_update_buffers(); if (ret < 0) return ret; if (val != 0 && val != 1) return -EINVAL; /* * Opening of "enable" adds a ref count to system, * so the name is safe to use. */ if (system) name = system->name; ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); if (ret) goto out; ret = cnt; out: *ppos += cnt; return ret; } enum { FORMAT_HEADER = 1, FORMAT_FIELD_SEPERATOR = 2, FORMAT_PRINTFMT = 3, }; static void *f_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_event_call *call = event_file_data(m->private); struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: node = common_head; break; case FORMAT_FIELD_SEPERATOR: node = head; break; case FORMAT_PRINTFMT: /* all done */ return NULL; } node = node->prev; if (node == common_head) return (void *)FORMAT_FIELD_SEPERATOR; else if (node == head) return (void *)FORMAT_PRINTFMT; else return node; } static int f_show(struct seq_file *m, void *v) { struct trace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; const char *array_descriptor; switch ((unsigned long)v) { case FORMAT_HEADER: seq_printf(m, "name: %s\n", trace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); seq_puts(m, "format:\n"); return 0; case FORMAT_FIELD_SEPERATOR: seq_putc(m, '\n'); return 0; case FORMAT_PRINTFMT: seq_printf(m, "\nprint fmt: %s\n", call->print_fmt); return 0; } field = list_entry(v, struct ftrace_event_field, link); /* * Smartly shows the array type(except dynamic array). * Normal: * field:TYPE VAR * If TYPE := TYPE[LEN], it is shown: * field:TYPE VAR[LEN] */ array_descriptor = strchr(field->type, '['); if (str_has_prefix(field->type, "__data_loc")) array_descriptor = NULL; if (!array_descriptor) seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); else seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, array_descriptor, field->offset, field->size, !!field->is_signed); return 0; } static void *f_start(struct seq_file *m, loff_t *pos) { void *p = (void *)FORMAT_HEADER; loff_t l = 0; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); if (!event_file_data(m->private)) return ERR_PTR(-ENODEV); while (l < *pos && p) p = f_next(m, p, &l); return p; } static void f_stop(struct seq_file *m, void *p) { mutex_unlock(&event_mutex); } static const struct seq_operations trace_format_seq_ops = { .start = f_start, .next = f_next, .stop = f_stop, .show = f_show, }; static int trace_format_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; /* Do we want to hide event format files on tracefs lockdown? */ ret = seq_open(file, &trace_format_seq_ops); if (ret < 0) return ret; m = file->private_data; m->private = file; return 0; } static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { int id = (long)event_file_data(filp); char buf[32]; int len; if (unlikely(!id)) return -ENODEV; len = sprintf(buf, "%d\n", id); return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; struct trace_seq *s; int r = -ENODEV; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); mutex_lock(&event_mutex); file = event_file_data(filp); if (file) print_event_filter(file, s); mutex_unlock(&event_mutex); if (file) r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *file; char *buf; int err = -ENODEV; if (cnt >= PAGE_SIZE) return -EINVAL; buf = memdup_user_nul(ubuf, cnt); if (IS_ERR(buf)) return PTR_ERR(buf); mutex_lock(&event_mutex); file = event_file_data(filp); if (file) err = apply_event_filter(file, buf); mutex_unlock(&event_mutex); kfree(buf); if (err < 0) return err; *ppos += cnt; return cnt; } static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { struct event_subsystem *system = NULL; struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */ struct trace_array *tr; int ret; if (tracing_is_disabled()) return -ENODEV; /* Make sure the system still exists */ mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { if (dir == inode->i_private) { /* Don't open systems with no events */ if (dir->nr_events) { __get_system_dir(dir); system = dir->subsystem; } goto exit_loop; } } } exit_loop: mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); if (!system) return -ENODEV; /* Some versions of gcc think dir can be uninitialized here */ WARN_ON(!dir); /* Still need to increment the ref count of the system */ if (trace_array_get(tr) < 0) { put_system(dir); return -ENODEV; } ret = tracing_open_generic(inode, filp); if (ret < 0) { trace_array_put(tr); put_system(dir); } return ret; } static int system_tr_open(struct inode *inode, struct file *filp) { struct trace_subsystem_dir *dir; struct trace_array *tr = inode->i_private; int ret; /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); if (!dir) return -ENOMEM; ret = tracing_open_generic_tr(inode, filp); if (ret < 0) { kfree(dir); return ret; } dir->tr = tr; filp->private_data = dir; return 0; } static int subsystem_release(struct inode *inode, struct file *file) { struct trace_subsystem_dir *dir = file->private_data; trace_array_put(dir->tr); /* * If dir->subsystem is NULL, then this is a temporary * descriptor that was made for a trace_array to enable * all subsystems. */ if (dir->subsystem) put_system(dir); else kfree(dir); return 0; } static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_seq *s; int r; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); print_subsystem_event_filter(system, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static ssize_t subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_subsystem_dir *dir = filp->private_data; char *buf; int err; if (cnt >= PAGE_SIZE) return -EINVAL; buf = memdup_user_nul(ubuf, cnt); if (IS_ERR(buf)) return PTR_ERR(buf); err = apply_subsystem_event_filter(dir, buf); kfree(buf); if (err < 0) return err; *ppos += cnt; return cnt; } static ssize_t show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { int (*func)(struct trace_seq *s) = filp->private_data; struct trace_seq *s; int r; if (*ppos) return 0; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); func(s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); kfree(s); return r; } static void ignore_task_cpu(void *data) { struct trace_array *tr = data; struct trace_pid_list *pid_list; struct trace_pid_list *no_pid_list; /* * This function is called by on_each_cpu() while the * event_mutex is held. */ pid_list = rcu_dereference_protected(tr->filtered_pids, mutex_is_locked(&event_mutex)); no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, mutex_is_locked(&event_mutex)); this_cpu_write(tr->array_buffer.data->ignore_pid, trace_ignore_this_task(pid_list, no_pid_list, current)); } static void register_pid_events(struct trace_array *tr) { /* * Register a probe that is called before all other probes * to set ignore_pid if next or prev do not match. * Register a probe this is called after all other probes * to only keep ignore_pid set if next pid matches. */ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, tr, INT_MAX); register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, tr, 0); register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr, INT_MAX); register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr, 0); register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr, INT_MAX); register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr, 0); register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr, INT_MAX); register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr, 0); } static ssize_t event_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos, int type) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; struct trace_pid_list *filtered_pids = NULL; struct trace_pid_list *other_pids = NULL; struct trace_pid_list *pid_list; struct trace_event_file *file; ssize_t ret; if (!cnt) return 0; ret = tracing_update_buffers(); if (ret < 0) return ret; mutex_lock(&event_mutex); if (type == TRACE_PIDS) { filtered_pids = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); other_pids = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); } else { filtered_pids = rcu_dereference_protected(tr->filtered_no_pids, lockdep_is_held(&event_mutex)); other_pids = rcu_dereference_protected(tr->filtered_pids, lockdep_is_held(&event_mutex)); } ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); if (ret < 0) goto out; if (type == TRACE_PIDS) rcu_assign_pointer(tr->filtered_pids, pid_list); else rcu_assign_pointer(tr->filtered_no_pids, pid_list); list_for_each_entry(file, &tr->events, list) { set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); } if (filtered_pids) { tracepoint_synchronize_unregister(); trace_free_pid_list(filtered_pids); } else if (pid_list && !other_pids) { register_pid_events(tr); } /* * Ignoring of pids is done at task switch. But we have to * check for those tasks that are currently running. * Always do this in case a pid was appended or removed. */ on_each_cpu(ignore_task_cpu, tr, 1); out: mutex_unlock(&event_mutex); if (ret > 0) *ppos += ret; return ret; } static ssize_t ftrace_event_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); } static ssize_t ftrace_event_npid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); } static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, .show = t_show, .stop = t_stop, }; static const struct seq_operations show_set_event_seq_ops = { .start = s_start, .next = s_next, .show = t_show, .stop = t_stop, }; static const struct seq_operations show_set_pid_seq_ops = { .start = p_start, .next = p_next, .show = trace_pid_show, .stop = p_stop, }; static const struct seq_operations show_set_no_pid_seq_ops = { .start = np_start, .next = np_next, .show = trace_pid_show, .stop = p_stop, }; static const struct file_operations ftrace_avail_fops = { .open = ftrace_event_avail_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_set_open, .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, .release = ftrace_event_release, }; static const struct file_operations ftrace_set_event_pid_fops = { .open = ftrace_event_set_pid_open, .read = seq_read, .write = ftrace_event_pid_write, .llseek = seq_lseek, .release = ftrace_event_release, }; static const struct file_operations ftrace_set_event_notrace_pid_fops = { .open = ftrace_event_set_npid_open, .read = seq_read, .write = ftrace_event_npid_write, .llseek = seq_lseek, .release = ftrace_event_release, }; static const struct file_operations ftrace_enable_fops = { .open = tracing_open_generic, .read = event_enable_read, .write = event_enable_write, .llseek = default_llseek, }; static const struct file_operations ftrace_event_format_fops = { .open = trace_format_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, .llseek = default_llseek, }; static const struct file_operations ftrace_event_filter_fops = { .open = tracing_open_generic, .read = event_filter_read, .write = event_filter_write, .llseek = default_llseek, }; static const struct file_operations ftrace_subsystem_filter_fops = { .open = subsystem_open, .read = subsystem_filter_read, .write = subsystem_filter_write, .llseek = default_llseek, .release = subsystem_release, }; static const struct file_operations ftrace_system_enable_fops = { .open = subsystem_open, .read = system_enable_read, .write = system_enable_write, .llseek = default_llseek, .release = subsystem_release, }; static const struct file_operations ftrace_tr_enable_fops = { .open = system_tr_open, .read = system_enable_read, .write = system_enable_write, .llseek = default_llseek, .release = subsystem_release, }; static const struct file_operations ftrace_show_header_fops = { .open = tracing_open_generic, .read = show_header, .llseek = default_llseek, }; static int ftrace_event_open(struct inode *inode, struct file *file, const struct seq_operations *seq_ops) { struct seq_file *m; int ret; ret = security_locked_down(LOCKDOWN_TRACEFS); if (ret) return ret; ret = seq_open(file, seq_ops); if (ret < 0) return ret; m = file->private_data; /* copy tr over to seq ops */ m->private = inode->i_private; return ret; } static int ftrace_event_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; trace_array_put(tr); return seq_release(inode, file); } static int ftrace_event_avail_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_event_seq_ops; /* Checks for tracefs lockdown */ return ftrace_event_open(inode, file, seq_ops); } static int ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_events(tr); ret = ftrace_event_open(inode, file, seq_ops); if (ret < 0) trace_array_put(tr); return ret; } static int ftrace_event_set_pid_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_pid_seq_ops; struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_event_pids(tr, TRACE_PIDS); ret = ftrace_event_open(inode, file, seq_ops); if (ret < 0) trace_array_put(tr); return ret; } static int ftrace_event_set_npid_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops; struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_event_pids(tr, TRACE_NO_PIDS); ret = ftrace_event_open(inode, file, seq_ops); if (ret < 0) trace_array_put(tr); return ret; } static struct event_subsystem * create_new_subsystem(const char *name) { struct event_subsystem *system; /* need to create new entry */ system = kmalloc(sizeof(*system), GFP_KERNEL); if (!system) return NULL; system->ref_count = 1; /* Only allocate if dynamic (kprobes and modules) */ system->name = kstrdup_const(name, GFP_KERNEL); if (!system->name) goto out_free; system->filter = NULL; system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); if (!system->filter) goto out_free; list_add(&system->list, &event_subsystems); return system; out_free: kfree_const(system->name); kfree(system); return NULL; } static struct dentry * event_subsystem_dir(struct trace_array *tr, const char *name, struct trace_event_file *file, struct dentry *parent) { struct trace_subsystem_dir *dir; struct event_subsystem *system; struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(dir, &tr->systems, list) { system = dir->subsystem; if (strcmp(system->name, name) == 0) { dir->nr_events++; file->system = dir; return dir->entry; } } /* Now see if the system itself exists. */ list_for_each_entry(system, &event_subsystems, list) { if (strcmp(system->name, name) == 0) break; } /* Reset system variable when not found */ if (&system->list == &event_subsystems) system = NULL; dir = kmalloc(sizeof(*dir), GFP_KERNEL); if (!dir) goto out_fail; if (!system) { system = create_new_subsystem(name); if (!system) goto out_free; } else __get_system(system); dir->entry = tracefs_create_dir(name, parent); if (!dir->entry) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; } dir->tr = tr; dir->ref_count = 1; dir->nr_events = 1; dir->subsystem = system; file->system = dir; entry = tracefs_create_file("filter", 0644, dir->entry, dir, &ftrace_subsystem_filter_fops); if (!entry) { kfree(system->filter); system->filter = NULL; pr_warn("Could not create tracefs '%s/filter' entry\n", name); } trace_create_file("enable", 0644, dir->entry, dir, &ftrace_system_enable_fops); list_add(&dir->list, &tr->systems); return dir->entry; out_free: kfree(dir); out_fail: /* Only print this message if failed on memory allocation */ if (!dir || !system) pr_warn("No memory to create event subsystem %s\n", name); return NULL; } static int event_define_fields(struct trace_event_call *call) { struct list_head *head; int ret = 0; /* * Other events may have the same class. Only update * the fields if they are not already defined. */ head = trace_get_fields(call); if (list_empty(head)) { struct trace_event_fields *field = call->class->fields_array; unsigned int offset = sizeof(struct trace_entry); for (; field->type; field++) { if (field->type == TRACE_FUNCTION_TYPE) { field->define_fields(call); break; } offset = ALIGN(offset, field->align); ret = trace_define_field(call, field->type, field->name, offset, field->size, field->is_signed, field->filter_type); if (WARN_ON_ONCE(ret)) { pr_err("error code is %d\n", ret); break; } offset += field->size; } } return ret; } static int event_create_dir(struct dentry *parent, struct trace_event_file *file) { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; struct dentry *d_events; const char *name; int ret; /* * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". */ if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { d_events = event_subsystem_dir(tr, call->class->system, file, parent); if (!d_events) return -ENOMEM; } else d_events = parent; name = trace_event_name(call); file->dir = tracefs_create_dir(name, d_events); if (!file->dir) { pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) trace_create_file("enable", 0644, file->dir, file, &ftrace_enable_fops); #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) trace_create_file("id", 0444, file->dir, (void *)(long)call->event.type, &ftrace_event_id_fops); #endif ret = event_define_fields(call); if (ret < 0) { pr_warn("Could not initialize trace point events/%s\n", name); return ret; } /* * Only event directories that can be enabled should have * triggers or filters. */ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { trace_create_file("filter", 0644, file->dir, file, &ftrace_event_filter_fops); trace_create_file("trigger", 0644, file->dir, file, &event_trigger_fops); } #ifdef CONFIG_HIST_TRIGGERS trace_create_file("hist", 0444, file->dir, file, &event_hist_fops); #endif #ifdef CONFIG_HIST_TRIGGERS_DEBUG trace_create_file("hist_debug", 0444, file->dir, file, &event_hist_debug_fops); #endif trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); #ifdef CONFIG_TRACE_EVENT_INJECT if (call->event.type && call->class->reg) trace_create_file("inject", 0200, file->dir, file, &event_inject_fops); #endif return 0; } static void remove_event_from_tracers(struct trace_event_call *call) { struct trace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { if (file->event_call != call) continue; remove_event_file_dir(file); /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this * trace_array, we use break to jump to the next * trace_array. */ break; } while_for_each_event_file(); } static void event_remove(struct trace_event_call *call) { struct trace_array *tr; struct trace_event_file *file; do_for_each_event_file(tr, file) { if (file->event_call != call) continue; if (file->flags & EVENT_FILE_FL_WAS_ENABLED) tr->clear_trace = true; ftrace_event_enable_disable(file, 0); /* * The do_for_each_event_file() is * a double loop. After finding the call for this * trace_array, we use break to jump to the next * trace_array. */ break; } while_for_each_event_file(); if (call->event.funcs) __unregister_trace_event(&call->event); remove_event_from_tracers(call); list_del(&call->list); } static int event_init(struct trace_event_call *call) { int ret = 0; const char *name; name = trace_event_name(call); if (WARN_ON(!name)) return -EINVAL; if (call->class->raw_init) { ret = call->class->raw_init(call); if (ret < 0 && ret != -ENOSYS) pr_warn("Could not initialize trace events/%s\n", name); } return ret; } static int __register_event(struct trace_event_call *call, struct module *mod) { int ret; ret = event_init(call); if (ret < 0) return ret; list_add(&call->list, &ftrace_events); call->mod = mod; return 0; } static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) { int rlen; int elen; /* Find the length of the eval value as a string */ elen = snprintf(ptr, 0, "%ld", map->eval_value); /* Make sure there's enough room to replace the string with the value */ if (len < elen) return NULL; snprintf(ptr, elen + 1, "%ld", map->eval_value); /* Get the rest of the string of ptr */ rlen = strlen(ptr + len); memmove(ptr + elen, ptr + len, rlen); /* Make sure we end the new string */ ptr[elen + rlen] = 0; return ptr + elen; } static void update_event_printk(struct trace_event_call *call, struct trace_eval_map *map) { char *ptr; int quote = 0; int len = strlen(map->eval_string); for (ptr = call->print_fmt; *ptr; ptr++) { if (*ptr == '\\') { ptr++; /* paranoid */ if (!*ptr) break; continue; } if (*ptr == '"') { quote ^= 1; continue; } if (quote) continue; if (isdigit(*ptr)) { /* skip numbers */ do { ptr++; /* Check for alpha chars like ULL */ } while (isalnum(*ptr)); if (!*ptr) break; /* * A number must have some kind of delimiter after * it, and we can ignore that too. */ continue; } if (isalpha(*ptr) || *ptr == '_') { if (strncmp(map->eval_string, ptr, len) == 0 && !isalnum(ptr[len]) && ptr[len] != '_') { ptr = eval_replace(ptr, map, len); /* enum/sizeof string smaller than value */ if (WARN_ON_ONCE(!ptr)) return; /* * No need to decrement here, as eval_replace() * returns the pointer to the character passed * the eval, and two evals can not be placed * back to back without something in between. * We can skip that something in between. */ continue; } skip_more: do { ptr++; } while (isalnum(*ptr) || *ptr == '_'); if (!*ptr) break; /* * If what comes after this variable is a '.' or * '->' then we can continue to ignore that string. */ if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { ptr += *ptr == '.' ? 1 : 2; if (!*ptr) break; goto skip_more; } /* * Once again, we can skip the delimiter that came * after the string. */ continue; } } } void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; bool first = false; int last_i; int i; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { first = true; last_i = 0; last_system = call->class->system; } /* * Since calls are grouped by systems, the likelyhood that the * next call in the iteration belongs to the same system as the * previous call is high. As an optimization, we skip seaching * for a map[] that matches the call's system if the last call * was from the same system. That's what last_i is for. If the * call has the same system as the previous call, then last_i * will be the index of the first map[] that has a matching * system. */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ if (first) { last_i = i; first = false; } update_event_printk(call, map[i]); } } } up_write(&trace_event_sem); } static struct trace_event_file * trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_event_file *file; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) return NULL; file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); atomic_set(&file->tm_ref, 0); INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); return file; } /* Add an event to a trace directory */ static int __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_event_file *file; file = trace_create_new_event(call, tr); if (!file) return -ENOMEM; if (eventdir_initialized) return event_create_dir(tr->event_dir, file); else return event_define_fields(call); } /* * Just create a decriptor for early init. A descriptor is required * for enabling events at boot. We want to enable events before * the filesystem is initialized. */ static int __trace_early_add_new_event(struct trace_event_call *call, struct trace_array *tr) { struct trace_event_file *file; file = trace_create_new_event(call, tr); if (!file) return -ENOMEM; return event_define_fields(call); } struct ftrace_module_file_ops; static void __add_event_to_tracers(struct trace_event_call *call); /* Add an additional event_call dynamically */ int trace_add_event_call(struct trace_event_call *call) { int ret; lockdep_assert_held(&event_mutex); mutex_lock(&trace_types_lock); ret = __register_event(call, NULL); if (ret >= 0) __add_event_to_tracers(call); mutex_unlock(&trace_types_lock); return ret; } /* * Must be called under locking of trace_types_lock, event_mutex and * trace_event_sem. */ static void __trace_remove_event_call(struct trace_event_call *call) { event_remove(call); trace_destroy_fields(call); free_event_filter(call->filter); call->filter = NULL; } static int probe_remove_event_call(struct trace_event_call *call) { struct trace_array *tr; struct trace_event_file *file; #ifdef CONFIG_PERF_EVENTS if (call->perf_refcount) return -EBUSY; #endif do_for_each_event_file(tr, file) { if (file->event_call != call) continue; /* * We can't rely on ftrace_event_enable_disable(enable => 0) * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) return -EBUSY; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this * trace_array, we use break to jump to the next * trace_array. */ break; } while_for_each_event_file(); __trace_remove_event_call(call); return 0; } /* Remove an event_call */ int trace_remove_event_call(struct trace_event_call *call) { int ret; lockdep_assert_held(&event_mutex); mutex_lock(&trace_types_lock); down_write(&trace_event_sem); ret = probe_remove_event_call(call); up_write(&trace_event_sem); mutex_unlock(&trace_types_lock); return ret; } #define for_each_event(event, start, end) \ for (event = start; \ (unsigned long)event < (unsigned long)end; \ event++) #ifdef CONFIG_MODULES static void trace_module_add_events(struct module *mod) { struct trace_event_call **call, **start, **end; if (!mod->num_trace_events) return; /* Don't add infrastructure for mods without tracepoints */ if (trace_module_has_bad_taint(mod)) { pr_err("%s: module has bad taint, not creating trace events\n", mod->name); return; } start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; for_each_event(call, start, end) { __register_event(*call, mod); __add_event_to_tracers(*call); } } static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) __trace_remove_event_call(call); } up_write(&trace_event_sem); /* * It is safest to reset the ring buffer if the module being unloaded * registered any events that were used. The only worry is if * a new module gets loaded, and takes on the same id as the events * of this module. When printing out the buffer, traced events left * over from this module may be passed to the new module events and * unexpected results may occur. */ tracing_reset_all_online_cpus(); } static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = data; mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); switch (val) { case MODULE_STATE_COMING: trace_module_add_events(mod); break; case MODULE_STATE_GOING: trace_module_remove_events(mod); break; } mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); return NOTIFY_OK; } static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, .priority = 1, /* higher than trace.c module notify */ }; #endif /* CONFIG_MODULES */ /* Create a new event directory structure for a trace directory. */ static void __trace_add_event_dirs(struct trace_array *tr) { struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create directory for event %s\n", trace_event_name(call)); } } /* Returns any file that matches the system and event */ struct trace_event_file * __find_event_file(struct trace_array *tr, const char *system, const char *event) { struct trace_event_file *file; struct trace_event_call *call; const char *name; list_for_each_entry(file, &tr->events, list) { call = file->event_call; name = trace_event_name(call); if (!name || !call->class) continue; if (strcmp(event, name) == 0 && strcmp(system, call->class->system) == 0) return file; } return NULL; } /* Returns valid trace event files that match system and event */ struct trace_event_file * find_event_file(struct trace_array *tr, const char *system, const char *event) { struct trace_event_file *file; file = __find_event_file(tr, system, event); if (!file || !file->event_call->class->reg || file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) return NULL; return file; } /** * trace_get_event_file - Find and return a trace event file * @instance: The name of the trace instance containing the event * @system: The name of the system containing the event * @event: The name of the event * * Return a trace event file given the trace instance name, trace * system, and trace event name. If the instance name is NULL, it * refers to the top-level trace array. * * This function will look it up and return it if found, after calling * trace_array_get() to prevent the instance from going away, and * increment the event's module refcount to prevent it from being * removed. * * To release the file, call trace_put_event_file(), which will call * trace_array_put() and decrement the event's module refcount. * * Return: The trace event on success, ERR_PTR otherwise. */ struct trace_event_file *trace_get_event_file(const char *instance, const char *system, const char *event) { struct trace_array *tr = top_trace_array(); struct trace_event_file *file = NULL; int ret = -EINVAL; if (instance) { tr = trace_array_find_get(instance); if (!tr) return ERR_PTR(-ENOENT); } else { ret = trace_array_get(tr); if (ret) return ERR_PTR(ret); } mutex_lock(&event_mutex); file = find_event_file(tr, system, event); if (!file) { trace_array_put(tr); ret = -EINVAL; goto out; } /* Don't let event modules unload while in use */ ret = try_module_get(file->event_call->mod); if (!ret) { trace_array_put(tr); ret = -EBUSY; goto out; } ret = 0; out: mutex_unlock(&event_mutex); if (ret) file = ERR_PTR(ret); return file; } EXPORT_SYMBOL_GPL(trace_get_event_file); /** * trace_put_event_file - Release a file from trace_get_event_file() * @file: The trace event file * * If a file was retrieved using trace_get_event_file(), this should * be called when it's no longer needed. It will cancel the previous * trace_array_get() called by that function, and decrement the * event's module refcount. */ void trace_put_event_file(struct trace_event_file *file) { mutex_lock(&event_mutex); module_put(file->event_call->mod); mutex_unlock(&event_mutex); trace_array_put(file->tr); } EXPORT_SYMBOL_GPL(trace_put_event_file); #ifdef CONFIG_DYNAMIC_FTRACE /* Avoid typos */ #define ENABLE_EVENT_STR "enable_event" #define DISABLE_EVENT_STR "disable_event" struct event_probe_data { struct trace_event_file *file; unsigned long count; int ref; bool enable; }; static void update_event_probe(struct event_probe_data *data) { if (data->enable) clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); else set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); } static void event_enable_probe(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; void **pdata; pdata = ftrace_func_mapper_find_ip(mapper, ip); if (!pdata || !*pdata) return; edata = *pdata; update_event_probe(edata); } static void event_enable_count_probe(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; void **pdata; pdata = ftrace_func_mapper_find_ip(mapper, ip); if (!pdata || !*pdata) return; edata = *pdata; if (!edata->count) return; /* Skip if the event is in a state we want to switch to */ if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; if (edata->count != -1) (edata->count)--; update_event_probe(edata); } static int event_enable_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; void **pdata; pdata = ftrace_func_mapper_find_ip(mapper, ip); if (WARN_ON_ONCE(!pdata || !*pdata)) return 0; edata = *pdata; seq_printf(m, "%ps:", (void *)ip); seq_printf(m, "%s:%s:%s", edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, edata->file->event_call->class->system, trace_event_name(edata->file->event_call)); if (edata->count == -1) seq_puts(m, ":unlimited\n"); else seq_printf(m, ":count=%ld\n", edata->count); return 0; } static int event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data) { struct ftrace_func_mapper *mapper = *data; struct event_probe_data *edata = init_data; int ret; if (!mapper) { mapper = allocate_ftrace_func_mapper(); if (!mapper) return -ENODEV; *data = mapper; } ret = ftrace_func_mapper_add_ip(mapper, ip, edata); if (ret < 0) return ret; edata->ref++; return 0; } static int free_probe_data(void *data) { struct event_probe_data *edata = data; edata->ref--; if (!edata->ref) { /* Remove the SOFT_MODE flag */ __ftrace_event_enable_disable(edata->file, 0, 1); module_put(edata->file->event_call->mod); kfree(edata); } return 0; } static void event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data) { struct ftrace_func_mapper *mapper = data; struct event_probe_data *edata; if (!ip) { if (!mapper) return; free_ftrace_func_mapper(mapper, free_probe_data); return; } edata = ftrace_func_mapper_remove_ip(mapper, ip); if (WARN_ON_ONCE(!edata)) return; if (WARN_ON_ONCE(edata->ref <= 0)) return; free_probe_data(edata); } static struct ftrace_probe_ops event_enable_probe_ops = { .func = event_enable_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static struct ftrace_probe_ops event_enable_count_probe_ops = { .func = event_enable_count_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static struct ftrace_probe_ops event_disable_probe_ops = { .func = event_enable_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static struct ftrace_probe_ops event_disable_count_probe_ops = { .func = event_enable_count_probe, .print = event_enable_print, .init = event_enable_init, .free = event_enable_free, }; static int event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enabled) { struct trace_event_file *file; struct ftrace_probe_ops *ops; struct event_probe_data *data; const char *system; const char *event; char *number; bool enable; int ret; if (!tr) return -ENODEV; /* hash funcs only work with set_ftrace_filter */ if (!enabled || !param) return -EINVAL; system = strsep(&param, ":"); if (!param) return -EINVAL; event = strsep(&param, ":"); mutex_lock(&event_mutex); ret = -EINVAL; file = find_event_file(tr, system, event); if (!file) goto out; enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; if (enable) ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops; else ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; if (glob[0] == '!') { ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); goto out; } ret = -ENOMEM; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out; data->enable = enable; data->count = -1; data->file = file; if (!param) goto out_reg; number = strsep(&param, ":"); ret = -EINVAL; if (!strlen(number)) goto out_free; /* * We use the callback data field (which is a pointer) * as our counter. */ ret = kstrtoul(number, 0, &data->count); if (ret) goto out_free; out_reg: /* Don't let event modules unload while probe registered */ ret = try_module_get(file->event_call->mod); if (!ret) { ret = -EBUSY; goto out_free; } ret = __ftrace_event_enable_disable(file, 1, 1); if (ret < 0) goto out_put; ret = register_ftrace_function_probe(glob, tr, ops, data); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. * Consider no functions a failure too. */ if (!ret) { ret = -ENOENT; goto out_disable; } else if (ret < 0) goto out_disable; /* Just return zero, not the number of enabled functions */ ret = 0; out: mutex_unlock(&event_mutex); return ret; out_disable: __ftrace_event_enable_disable(file, 0, 1); out_put: module_put(file->event_call->mod); out_free: kfree(data); goto out; } static struct ftrace_func_command event_enable_cmd = { .name = ENABLE_EVENT_STR, .func = event_enable_func, }; static struct ftrace_func_command event_disable_cmd = { .name = DISABLE_EVENT_STR, .func = event_enable_func, }; static __init int register_event_cmds(void) { int ret; ret = register_ftrace_command(&event_enable_cmd); if (WARN_ON(ret < 0)) return ret; ret = register_ftrace_command(&event_disable_cmd); if (WARN_ON(ret < 0)) unregister_ftrace_command(&event_enable_cmd); return ret; } #else static inline int register_event_cmds(void) { return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ /* * The top level array and trace arrays created by boot-time tracing * have already had its trace_event_file descriptors created in order * to allow for early events to be recorded. * This function is called after the tracefs has been initialized, * and we now have to create the files associated to the events. */ static void __trace_early_add_event_dirs(struct trace_array *tr) { struct trace_event_file *file; int ret; list_for_each_entry(file, &tr->events, list) { ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warn("Could not create directory for event %s\n", trace_event_name(file->event_call)); } } /* * For early boot up, the top trace array and the trace arrays created * by boot-time tracing require to have a list of events that can be * enabled. This must be done before the filesystem is set up in order * to allow events to be traced early. */ void __trace_early_add_events(struct trace_array *tr) { struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { /* Early boot up should not have any modules loaded */ if (WARN_ON_ONCE(call->mod)) continue; ret = __trace_early_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create early event %s\n", trace_event_name(call)); } } /* Remove the event directory structure for a trace directory. */ static void __trace_remove_event_dirs(struct trace_array *tr) { struct trace_event_file *file, *next; list_for_each_entry_safe(file, next, &tr->events, list) remove_event_file_dir(file); } static void __add_event_to_tracers(struct trace_event_call *call) { struct trace_array *tr; list_for_each_entry(tr, &ftrace_trace_arrays, list) __trace_add_new_event(call, tr); } extern struct trace_event_call *__start_ftrace_events[]; extern struct trace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; static __init int setup_trace_event(char *str) { strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); ring_buffer_expanded = true; disable_tracing_selftest("running event tracing"); return 1; } __setup("trace_event=", setup_trace_event); /* Expects to have event_mutex held when called */ static int create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) { struct dentry *d_events; struct dentry *entry; entry = tracefs_create_file("set_event", 0644, parent, tr, &ftrace_set_event_fops); if (!entry) { pr_warn("Could not create tracefs 'set_event' entry\n"); return -ENOMEM; } d_events = tracefs_create_dir("events", parent); if (!d_events) { pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } entry = trace_create_file("enable", 0644, d_events, tr, &ftrace_tr_enable_fops); if (!entry) { pr_warn("Could not create tracefs 'enable' entry\n"); return -ENOMEM; } /* There are not as crucial, just warn if they are not created */ entry = tracefs_create_file("set_event_pid", 0644, parent, tr, &ftrace_set_event_pid_fops); if (!entry) pr_warn("Could not create tracefs 'set_event_pid' entry\n"); entry = tracefs_create_file("set_event_notrace_pid", 0644, parent, tr, &ftrace_set_event_notrace_pid_fops); if (!entry) pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n"); /* ring buffer internal formats */ entry = trace_create_file("header_page", 0444, d_events, ring_buffer_print_page_header, &ftrace_show_header_fops); if (!entry) pr_warn("Could not create tracefs 'header_page' entry\n"); entry = trace_create_file("header_event", 0444, d_events, ring_buffer_print_entry_header, &ftrace_show_header_fops); if (!entry) pr_warn("Could not create tracefs 'header_event' entry\n"); tr->event_dir = d_events; return 0; } /** * event_trace_add_tracer - add a instance of a trace_array to events * @parent: The parent dentry to place the files/directories for events in * @tr: The trace array associated with these events * * When a new instance is created, it needs to set up its events * directory, as well as other files associated with events. It also * creates the event hierachry in the @parent/events directory. * * Returns 0 on success. * * Must be called with event_mutex held. */ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; lockdep_assert_held(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) goto out; down_write(&trace_event_sem); /* If tr already has the event list, it is initialized in early boot. */ if (unlikely(!list_empty(&tr->events))) __trace_early_add_event_dirs(tr); else __trace_add_event_dirs(tr); up_write(&trace_event_sem); out: return ret; } /* * The top trace array already had its file descriptors created. * Now the files themselves need to be created. */ static __init int early_event_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; mutex_lock(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) goto out_unlock; down_write(&trace_event_sem); __trace_early_add_event_dirs(tr); up_write(&trace_event_sem); out_unlock: mutex_unlock(&event_mutex); return ret; } /* Must be called with event_mutex held */ int event_trace_del_tracer(struct trace_array *tr) { lockdep_assert_held(&event_mutex); /* Disable any event triggers and associated soft-disabled events */ clear_event_triggers(tr); /* Clear the pid list */ __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); /* Disable any running events */ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); /* Make sure no more events are being executed */ tracepoint_synchronize_unregister(); down_write(&trace_event_sem); __trace_remove_event_dirs(tr); tracefs_remove(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; return 0; } static __init int event_trace_memsetup(void) { field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); return 0; } static __init void early_enable_events(struct trace_array *tr, bool disable_first) { char *buf = bootup_event_buf; char *token; int ret; while (true) { token = strsep(&buf, ","); if (!token) break; if (*token) { /* Restarting syscalls requires that we stop them first */ if (disable_first) ftrace_set_clr_event(tr, token, 0); ret = ftrace_set_clr_event(tr, token, 1); if (ret) pr_warn("Failed to enable trace event: %s\n", token); } /* Put back the comma to allow this to be called again */ if (buf) *(buf - 1) = ','; } } static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); struct trace_event_call **iter, *call; int ret; if (!tr) return -ENODEV; for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { call = *iter; ret = event_init(call); if (!ret) list_add(&call->list, &ftrace_events); } /* * We need the top trace array to have a working set of trace * points at early init, before the debug files and directories * are created. Create the file entries now, and attach them * to the actual file dentries later. */ __trace_early_add_events(tr); early_enable_events(tr, false); trace_printk_start_comm(); register_event_cmds(); register_trigger_cmds(); return 0; } /* * event_trace_enable() is called from trace_event_init() first to * initialize events and perhaps start any events that are on the * command line. Unfortunately, there are some events that will not * start this early, like the system call tracepoints that need * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable() * is called before pid 1 starts, and this flag is never set, making * the syscall tracepoint never get reached, but the event is enabled * regardless (and not doing anything). */ static __init int event_trace_enable_again(void) { struct trace_array *tr; tr = top_trace_array(); if (!tr) return -ENODEV; early_enable_events(tr, true); return 0; } early_initcall(event_trace_enable_again); /* Init fields which doesn't related to the tracefs */ static __init int event_trace_init_fields(void) { if (trace_define_generic_fields()) pr_warn("tracing: Failed to allocated generic fields"); if (trace_define_common_fields()) pr_warn("tracing: Failed to allocate common fields"); return 0; } __init int event_trace_init(void) { struct trace_array *tr; struct dentry *entry; int ret; tr = top_trace_array(); if (!tr) return -ENODEV; entry = tracefs_create_file("available_events", 0444, NULL, tr, &ftrace_avail_fops); if (!entry) pr_warn("Could not create tracefs 'available_events' entry\n"); ret = early_event_add_tracer(NULL, tr); if (ret) return ret; #ifdef CONFIG_MODULES ret = register_module_notifier(&trace_module_nb); if (ret) pr_warn("Failed to register trace events module notifier\n"); #endif eventdir_initialized = true; return 0; } void __init trace_event_init(void) { event_trace_memsetup(); init_ftrace_syscalls(); event_trace_enable(); event_trace_init_fields(); } #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST static DEFINE_SPINLOCK(test_spinlock); static DEFINE_SPINLOCK(test_spinlock_irq); static DEFINE_MUTEX(test_mutex); static __init void test_work(struct work_struct *dummy) { spin_lock(&test_spinlock); spin_lock_irq(&test_spinlock_irq); udelay(1); spin_unlock_irq(&test_spinlock_irq); spin_unlock(&test_spinlock); mutex_lock(&test_mutex); msleep(1); mutex_unlock(&test_mutex); } static __init int event_test_thread(void *unused) { void *test_malloc; test_malloc = kmalloc(1234, GFP_KERNEL); if (!test_malloc) pr_info("failed to kmalloc\n"); schedule_on_each_cpu(test_work); kfree(test_malloc); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { schedule(); set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); return 0; } /* * Do various things that may trigger events. */ static __init void event_test_stuff(void) { struct task_struct *test_thread; test_thread = kthread_run(event_test_thread, NULL, "test-events"); msleep(1); kthread_stop(test_thread); } /* * For every trace event defined, we will test each trace point separately, * and then by groups, and finally all trace points. */ static __init void event_trace_self_tests(void) { struct trace_subsystem_dir *dir; struct trace_event_file *file; struct trace_event_call *call; struct event_subsystem *system; struct trace_array *tr; int ret; tr = top_trace_array(); if (!tr) return; pr_info("Running tests on trace events:\n"); list_for_each_entry(file, &tr->events, list) { call = file->event_call; /* Only test those that have a probe */ if (!call->class || !call->class->probe) continue; /* * Testing syscall events here is pretty useless, but * we still do it if configured. But this is time consuming. * What we really need is a user thread to perform the * syscalls as we test. */ #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS if (call->class->system && strcmp(call->class->system, "syscalls") == 0) continue; #endif pr_info("Testing event %s: ", trace_event_name(call)); /* * If an event is already enabled, someone is using * it and the self test should not be on. */ if (file->flags & EVENT_FILE_FL_ENABLED) { pr_warn("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; } ftrace_event_enable_disable(file, 1); event_test_stuff(); ftrace_event_enable_disable(file, 0); pr_cont("OK\n"); } /* Now test at the sub system level */ pr_info("Running tests on trace event systems:\n"); list_for_each_entry(dir, &tr->systems, list) { system = dir->subsystem; /* the ftrace system is special, skip it */ if (strcmp(system->name, "ftrace") == 0) continue; pr_info("Testing event system %s: ", system->name); ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); if (WARN_ON_ONCE(ret)) { pr_warn("error enabling system %s\n", system->name); continue; } event_test_stuff(); ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); if (WARN_ON_ONCE(ret)) { pr_warn("error disabling system %s\n", system->name); continue; } pr_cont("OK\n"); } /* Test with all events enabled */ pr_info("Running tests on all trace events:\n"); pr_info("Testing all events: "); ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); if (WARN_ON_ONCE(ret)) { pr_warn("error enabling all events\n"); return; } event_test_stuff(); /* reset sysname */ ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); if (WARN_ON_ONCE(ret)) { pr_warn("error disabling all events\n"); return; } pr_cont("OK\n"); } #ifdef CONFIG_FUNCTION_TRACER static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static struct trace_event_file event_trace_file __initdata; static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_buffer *buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; unsigned long flags; long disabled; int cpu; int pc; pc = preempt_count(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); if (disabled != 1) goto out; local_save_flags(flags); event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, TRACE_FN, sizeof(*entry), flags, pc); if (!event) goto out; entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; event_trigger_unlock_commit(&event_trace_file, buffer, event, entry, flags, pc); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); } static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static __init void event_trace_self_test_with_function(void) { int ret; event_trace_file.tr = top_trace_array(); if (WARN_ON(!event_trace_file.tr)) return; ret = register_ftrace_function(&trace_ops); if (WARN_ON(ret < 0)) { pr_info("Failed to enable function tracer for event tests\n"); return; } pr_info("Running tests again, along with the function tracer\n"); event_trace_self_tests(); unregister_ftrace_function(&trace_ops); } #else static __init void event_trace_self_test_with_function(void) { } #endif static __init int event_trace_self_tests_init(void) { if (!tracing_selftest_disabled) { event_trace_self_tests(); event_trace_self_test_with_function(); } return 0; } late_initcall(event_trace_self_tests_init); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* delayacct.h - per-task delay accounting * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 */ #ifndef _LINUX_DELAYACCT_H #define _LINUX_DELAYACCT_H #include <uapi/linux/taskstats.h> /* * Per-task flags relevant to delay accounting * maintained privately to avoid exhausting similar flags in sched.h:PF_* * Used to set current->delays->flags */ #define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ #define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { raw_spinlock_t lock; unsigned int flags; /* Private per-task flags */ /* For each stat XXX, add following, aligned appropriately * * struct timespec XXX_start, XXX_end; * u64 XXX_delay; * u32 XXX_count; * * Atomicity of updates to XXX_delay, XXX_count protected by * single lock above (split into XXX_lock if contention is an issue). */ /* * XXX_count is incremented on every XXX operation, the delay * associated with the operation is added to XXX_delay. * XXX_delay contains the accumulated delay time in nanoseconds. */ u64 blkio_start; /* Shared by blkio, swapin */ u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_delay; /* wait for swapin block io completion */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ u32 swapin_count; /* total count of the number of swapin block */ /* io operations performed */ u64 freepages_start; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; u64 thrashing_delay; /* wait for thrashing page */ u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ }; #endif #include <linux/sched.h> #include <linux/slab.h> #ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(struct task_struct *); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); extern void __delayacct_thrashing_start(void); extern void __delayacct_thrashing_end(void); static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) { if (p->delays) return (p->delays->flags & DELAYACCT_PF_BLKIO); else return 0; } static inline void delayacct_set_flag(int flag) { if (current->delays) current->delays->flags |= flag; } static inline void delayacct_clear_flag(int flag) { if (current->delays) current->delays->flags &= ~flag; } static inline void delayacct_tsk_init(struct task_struct *tsk) { /* reinitialize in case parent's non-null pointer was dup'ed*/ tsk->delays = NULL; if (delayacct_on) __delayacct_tsk_init(tsk); } /* Free tsk->delays. Called from bad fork and __put_task_struct * where there's no risk of tsk->delays being accessed elsewhere */ static inline void delayacct_tsk_free(struct task_struct *tsk) { if (tsk->delays) kmem_cache_free(delayacct_cache, tsk->delays); tsk->delays = NULL; } static inline void delayacct_blkio_start(void) { delayacct_set_flag(DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); } static inline void delayacct_blkio_end(struct task_struct *p) { if (p->delays) __delayacct_blkio_end(p); delayacct_clear_flag(DELAYACCT_PF_BLKIO); } static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { if (!delayacct_on || !tsk->delays) return 0; return __delayacct_add_tsk(d, tsk); } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { if (tsk->delays) return __delayacct_blkio_ticks(tsk); return 0; } static inline void delayacct_freepages_start(void) { if (current->delays) __delayacct_freepages_start(); } static inline void delayacct_freepages_end(void) { if (current->delays) __delayacct_freepages_end(); } static inline void delayacct_thrashing_start(void) { if (current->delays) __delayacct_thrashing_start(); } static inline void delayacct_thrashing_end(void) { if (current->delays) __delayacct_thrashing_end(); } #else static inline void delayacct_set_flag(int flag) {} static inline void delayacct_clear_flag(int flag) {} static inline void delayacct_init(void) {} static inline void delayacct_tsk_init(struct task_struct *tsk) {} static inline void delayacct_tsk_free(struct task_struct *tsk) {} static inline void delayacct_blkio_start(void) {} static inline void delayacct_blkio_end(struct task_struct *p) {} static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { return 0; } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { return 0; } static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) { return 0; } static inline void delayacct_freepages_start(void) {} static inline void delayacct_freepages_end(void) {} static inline void delayacct_thrashing_start(void) {} static inline void delayacct_thrashing_end(void) {} #endif /* CONFIG_TASK_DELAY_ACCT */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_COMMON_H #define _INET_COMMON_H #include <linux/indirect_call_wrapper.h> extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; /* * INET4 prototypes used by INET6 */ struct msghdr; struct sock; struct sockaddr; struct socket; int inet_release(struct socket *sock); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern); int inet_send_prepare(struct sock *sk); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); /* Don't allocate port at this moment, defer to connect. */ #define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) /* Grab and release socket lock. */ #define BIND_WITH_LOCK (1 << 1) /* Called from BPF program. */ #define BIND_FROM_BPF (1 << 2) int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb); int inet_gro_complete(struct sk_buff *skb, int nhoff); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features); static inline void inet_ctl_sock_destroy(struct sock *sk) { if (sk) sock_release(sk->sk_socket); } #define indirect_call_gro_receive(f2, f1, cb, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ NAPI_GRO_CB(skb)->flush |= 1, NULL : \ INDIRECT_CALL_2(cb, f2, f1, head, skb); \ }) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H #include <linux/neighbour.h> /* * Generic neighbour manipulation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * Changes: * * Harald Welte: <laforge@gnumonks.org> * - Add neighbour cache statistics like rtstat */ #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/bitmap.h> #include <linux/err.h> #include <linux/sysctl.h> #include <linux/workqueue.h> #include <net/rtnetlink.h> /* * NUD stands for "neighbor unreachability detection" */ #define NUD_IN_TIMER (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE) #define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY) #define NUD_CONNECTED (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE) struct neighbour; enum { NEIGH_VAR_MCAST_PROBES, NEIGH_VAR_UCAST_PROBES, NEIGH_VAR_APP_PROBES, NEIGH_VAR_MCAST_REPROBES, NEIGH_VAR_RETRANS_TIME, NEIGH_VAR_BASE_REACHABLE_TIME, NEIGH_VAR_DELAY_PROBE_TIME, NEIGH_VAR_GC_STALETIME, NEIGH_VAR_QUEUE_LEN_BYTES, NEIGH_VAR_PROXY_QLEN, NEIGH_VAR_ANYCAST_DELAY, NEIGH_VAR_PROXY_DELAY, NEIGH_VAR_LOCKTIME, #define NEIGH_VAR_DATA_MAX (NEIGH_VAR_LOCKTIME + 1) /* Following are used as a second way to access one of the above */ NEIGH_VAR_QUEUE_LEN, /* same data as NEIGH_VAR_QUEUE_LEN_BYTES */ NEIGH_VAR_RETRANS_TIME_MS, /* same data as NEIGH_VAR_RETRANS_TIME */ NEIGH_VAR_BASE_REACHABLE_TIME_MS, /* same data as NEIGH_VAR_BASE_REACHABLE_TIME */ /* Following are used by "default" only */ NEIGH_VAR_GC_INTERVAL, NEIGH_VAR_GC_THRESH1, NEIGH_VAR_GC_THRESH2, NEIGH_VAR_GC_THRESH3, NEIGH_VAR_MAX }; struct neigh_parms { possible_net_t net; struct net_device *dev; struct list_head list; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; void *sysctl_table; int dead; refcount_t refcnt; struct rcu_head rcu_head; int reachable_time; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; static inline void neigh_var_set(struct neigh_parms *p, int index, int val) { set_bit(index, p->data_state); p->data[index] = val; } #define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) /* In ndo_neigh_setup, NEIGH_VAR_INIT should be used. * In other cases, NEIGH_VAR_SET should be used. */ #define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) static inline void neigh_parms_data_state_setall(struct neigh_parms *p) { bitmap_fill(p->data_state, NEIGH_VAR_DATA_MAX); } static inline void neigh_parms_data_state_cleanall(struct neigh_parms *p) { bitmap_zero(p->data_state, NEIGH_VAR_DATA_MAX); } struct neigh_statistics { unsigned long allocs; /* number of allocated neighs */ unsigned long destroys; /* number of destroyed neighs */ unsigned long hash_grows; /* number of hash resizes */ unsigned long res_failed; /* number of failed resolutions */ unsigned long lookups; /* number of lookups */ unsigned long hits; /* number of hits (among lookups) */ unsigned long rcv_probes_mcast; /* number of received mcast ipv6 */ unsigned long rcv_probes_ucast; /* number of received ucast ipv6 */ unsigned long periodic_gc_runs; /* number of periodic GC runs */ unsigned long forced_gc_runs; /* number of forced GC runs */ unsigned long unres_discards; /* number of unresolved drops */ unsigned long table_fulls; /* times even gc couldn't help */ }; #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; unsigned long updated; rwlock_t lock; refcount_t refcnt; unsigned int arp_queue_len_bytes; struct sk_buff_head arp_queue; struct timer_list timer; unsigned long used; atomic_t probes; __u8 flags; __u8 nud_state; __u8 type; __u8 dead; u8 protocol; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8); struct hh_cache hh; int (*output)(struct neighbour *, struct sk_buff *); const struct neigh_ops *ops; struct list_head gc_list; struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; } __randomize_layout; struct neigh_ops { int family; void (*solicit)(struct neighbour *, struct sk_buff *); void (*error_report)(struct neighbour *, struct sk_buff *); int (*output)(struct neighbour *, struct sk_buff *); int (*connected_output)(struct neighbour *, struct sk_buff *); }; struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; u8 flags; u8 protocol; u8 key[]; }; /* * neighbour table manipulation */ #define NEIGH_NUM_HASH_RND 4 struct neigh_hash_table { struct neighbour __rcu **hash_buckets; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; }; struct neigh_table { int family; unsigned int entry_size; unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); bool (*key_eq)(const struct neighbour *, const void *pkey); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); int (*is_multicast)(const void *pkey); bool (*allow_add)(const struct net_device *dev, struct netlink_ext_ack *extack); char *id; struct neigh_parms parms; struct list_head parms_list; int gc_interval; int gc_thresh1; int gc_thresh2; int gc_thresh3; unsigned long last_flush; struct delayed_work gc_work; struct timer_list proxy_timer; struct sk_buff_head proxy_queue; atomic_t entries; atomic_t gc_entries; struct list_head gc_list; rwlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; struct pneigh_entry **phash_buckets; }; enum { NEIGH_ARP_TABLE = 0, NEIGH_ND_TABLE = 1, NEIGH_DN_TABLE = 2, NEIGH_NR_TABLES, NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ }; static inline int neigh_parms_family(struct neigh_parms *p) { return p->tbl->family; } #define NEIGH_PRIV_ALIGN sizeof(long long) #define NEIGH_ENTRY_SIZE(size) ALIGN((size), NEIGH_PRIV_ALIGN) static inline void *neighbour_priv(const struct neighbour *n) { return (char *)n + n->tbl->entry_size; } /* flags for neigh_update() */ #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 extern const struct nla_policy nda_policy[]; static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey) { return *(const u16 *)n->primary_key == *(const u16 *)pkey; } static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { return *(const u32 *)n->primary_key == *(const u32 *)pkey; } static inline bool neigh_key_eq128(const struct neighbour *n, const void *pkey) { const u32 *n32 = (const u32 *)n->primary_key; const u32 *p32 = pkey; return ((n32[0] ^ p32[0]) | (n32[1] ^ p32[1]) | (n32[2] ^ p32[2]) | (n32[3] ^ p32[3])) == 0; } static inline struct neighbour *___neigh_lookup_noref( struct neigh_table *tbl, bool (*key_eq)(const struct neighbour *n, const void *pkey), __u32 (*hash)(const void *pkey, const struct net_device *dev, __u32 *hash_rnd), const void *pkey, struct net_device *dev) { struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht); struct neighbour *n; u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n->next)) { if (n->dev == dev && key_eq(n, pkey)) return n; } return NULL; } static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev); struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey); struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref); static inline struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb); struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev); struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); static inline struct net *neigh_parms_net(const struct neigh_parms *parms) { return read_pnet(&parms->net); } unsigned long neigh_rand_reach_time(unsigned long base); void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat); struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); static inline struct net *pneigh_net(const struct pneigh_entry *pneigh) { return read_pnet(&pneigh->net); } void neigh_app_ns(struct neighbour *n); void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *); void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); struct neigh_seq_state { struct seq_net_private p; struct neigh_table *tbl; struct neigh_hash_table *nht; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); unsigned int bucket; unsigned int flags; #define NEIGH_SEQ_NEIGH_ONLY 0x00000001 #define NEIGH_SEQ_IS_PNEIGH 0x00000002 #define NEIGH_SEQ_SKIP_NOARP 0x00000004 }; void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *, unsigned int); void *neigh_seq_next(struct seq_file *, void *, loff_t *); void neigh_seq_stop(struct seq_file *, void *); int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, proc_handler *proc_handler); void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) { refcount_dec(&parms->refcnt); } static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) { refcount_inc(&parms->refcnt); return parms; } /* * Neighbour references */ static inline void neigh_release(struct neighbour *neigh) { if (refcount_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { if (neigh) refcount_inc(&neigh->refcnt); return neigh; } #define neigh_hold(n) refcount_inc(&(n)->refcnt) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq, hh_alen; do { seq = read_seqbegin(&hh->hh_lock); hh_alen = HH_DATA_ALIGN(ETH_HLEN); memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN); } while (read_seqretry(&hh->hh_lock, seq)); return 0; } #endif static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; do { seq = read_seqbegin(&hh->hh_lock); hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; /* skb_push() would proceed silently if we have room for * the unaligned size but not for the aligned size: * check headroom explicitly. */ if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { /* this is inlined by gcc */ memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); } } else { hh_alen = HH_DATA_ALIGN(hh_len); if (likely(skb_headroom(skb) >= hh_alen)) { memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); } } } while (read_seqretry(&hh->hh_lock, seq)); if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { kfree_skb(skb); return NET_XMIT_DROP; } __skb_push(skb, hh_len); return dev_queue_xmit(skb); } static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, bool skip_cache) { const struct hh_cache *hh = &n->hh; if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) return neigh_hh_output(hh, skb); else return n->output(n, skb); } static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { struct neighbour *n = neigh_lookup(tbl, pkey, dev); if (n || !creat) return n; n = neigh_create(tbl, pkey, dev); return IS_ERR(n) ? NULL : n; } static inline struct neighbour * __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n = neigh_lookup(tbl, pkey, dev); if (n) return n; return neigh_create(tbl, pkey, dev); } struct neighbour_cb { unsigned long sched_next; unsigned int flags; }; #define LOCALLY_ENQUEUED 0x1 #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, const struct net_device *dev) { unsigned int seq; do { seq = read_seqbegin(&n->ha_lock); memcpy(dst, n->ha, dev->addr_len); } while (read_seqretry(&n->ha_lock, seq)); } static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, int *notify) { u8 ndm_flags = 0; ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0; if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) { if (ndm_flags & NTF_ROUTER) neigh->flags |= NTF_ROUTER; else neigh->flags &= ~NTF_ROUTER; *notify = 1; } } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2007, 2008, 2009 Siemens AG * * Written by: * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> */ #ifndef __NET_CFG802154_H #define __NET_CFG802154_H #include <linux/ieee802154.h> #include <linux/netdevice.h> #include <linux/mutex.h> #include <linux/bug.h> #include <net/nl802154.h> struct wpan_phy; struct wpan_phy_cca; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL struct ieee802154_llsec_device_key; struct ieee802154_llsec_seclevel; struct ieee802154_llsec_params; struct ieee802154_llsec_device; struct ieee802154_llsec_table; struct ieee802154_llsec_key_id; struct ieee802154_llsec_key; #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ struct cfg802154_ops { struct net_device * (*add_virtual_intf_deprecated)(struct wpan_phy *wpan_phy, const char *name, unsigned char name_assign_type, int type); void (*del_virtual_intf_deprecated)(struct wpan_phy *wpan_phy, struct net_device *dev); int (*suspend)(struct wpan_phy *wpan_phy); int (*resume)(struct wpan_phy *wpan_phy); int (*add_virtual_intf)(struct wpan_phy *wpan_phy, const char *name, unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr); int (*del_virtual_intf)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev); int (*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel); int (*set_cca_mode)(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca); int (*set_cca_ed_level)(struct wpan_phy *wpan_phy, s32 ed_level); int (*set_tx_power)(struct wpan_phy *wpan_phy, s32 power); int (*set_pan_id)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id); int (*set_short_addr)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 short_addr); int (*set_backoff_exponent)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 min_be, u8 max_be); int (*set_max_csma_backoffs)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 max_csma_backoffs); int (*set_max_frame_retries)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, s8 max_frame_retries); int (*set_lbt_mode)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool mode); int (*set_ackreq_default)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool ackreq); #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL void (*get_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, struct ieee802154_llsec_table **table); void (*lock_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev); void (*unlock_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev); /* TODO remove locking/get table callbacks, this is part of the * nl802154 interface and should be accessible from ieee802154 layer. */ int (*get_llsec_params)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, struct ieee802154_llsec_params *params); int (*set_llsec_params)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_params *params, int changed); int (*add_llsec_key)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_key_id *id, const struct ieee802154_llsec_key *key); int (*del_llsec_key)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_key_id *id); int (*add_seclevel)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_seclevel *sl); int (*del_seclevel)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_seclevel *sl); int (*add_device)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, const struct ieee802154_llsec_device *dev); int (*del_device)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le64 extended_addr); int (*add_devkey)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le64 extended_addr, const struct ieee802154_llsec_device_key *key); int (*del_devkey)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le64 extended_addr, const struct ieee802154_llsec_device_key *key); #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; static inline bool wpan_phy_supported_bool(bool b, enum nl802154_supported_bool_states st) { switch (st) { case NL802154_SUPPORTED_BOOL_TRUE: return b; case NL802154_SUPPORTED_BOOL_FALSE: return !b; case NL802154_SUPPORTED_BOOL_BOTH: return true; default: WARN_ON(1); } return false; } struct wpan_phy_supported { u32 channels[IEEE802154_MAX_PAGE + 1], cca_modes, cca_opts, iftypes; enum nl802154_supported_bool_states lbt; u8 min_minbe, max_minbe, min_maxbe, max_maxbe, min_csma_backoffs, max_csma_backoffs; s8 min_frame_retries, max_frame_retries; size_t tx_powers_size, cca_ed_levels_size; const s32 *tx_powers, *cca_ed_levels; }; struct wpan_phy_cca { enum nl802154_cca_modes mode; enum nl802154_cca_opts opt; }; static inline bool wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) { if (a->mode != b->mode) return false; if (a->mode == NL802154_CCA_ENERGY_CARRIER) return a->opt == b->opt; return true; } /** * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support * transmit power setting. * @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed * level setting. * @WPAN_PHY_FLAG_CCA_MODE: Indicates that transceiver will support cca mode * setting. */ enum wpan_phy_flags { WPAN_PHY_FLAG_TXPOWER = BIT(1), WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), WPAN_PHY_FLAG_CCA_MODE = BIT(3), }; struct wpan_phy { /* If multiple wpan_phys are registered and you're handed e.g. * a regular netdev with assigned ieee802154_ptr, you won't * know whether it points to a wpan_phy your driver has registered * or not. Assign this to something global to your driver to * help determine whether you own this wpan_phy or not. */ const void *privid; u32 flags; /* * This is a PIB according to 802.15.4-2011. * We do not provide timing-related variables, as they * aren't used outside of driver */ u8 current_channel; u8 current_page; struct wpan_phy_supported supported; /* current transmit_power in mBm */ s32 transmit_power; struct wpan_phy_cca cca; __le64 perm_extended_addr; /* current cca ed threshold in mBm */ s32 cca_ed_level; /* PHY depended MAC PIB values */ /* 802.15.4 acronym: Tdsym in usec */ u8 symbol_duration; /* lifs and sifs periods timing */ u16 lifs_period; u16 sifs_period; struct device dev; /* the network namespace this phy lives in currently */ possible_net_t _net; char priv[] __aligned(NETDEV_ALIGN); }; static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy) { return read_pnet(&wpan_phy->_net); } static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) { write_pnet(&wpan_phy->_net, net); } struct ieee802154_addr { u8 mode; __le16 pan_id; union { __le16 short_addr; __le64 extended_addr; }; }; struct ieee802154_llsec_key_id { u8 mode; u8 id; union { struct ieee802154_addr device_addr; __le32 short_source; __le64 extended_source; }; }; #define IEEE802154_LLSEC_KEY_SIZE 16 struct ieee802154_llsec_key { u8 frame_types; u32 cmd_frame_ids; /* TODO replace with NL802154_KEY_SIZE */ u8 key[IEEE802154_LLSEC_KEY_SIZE]; }; struct ieee802154_llsec_key_entry { struct list_head list; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; }; struct ieee802154_llsec_params { bool enabled; __be32 frame_counter; u8 out_level; struct ieee802154_llsec_key_id out_key; __le64 default_key_source; __le16 pan_id; __le64 hwaddr; __le64 coord_hwaddr; __le16 coord_shortaddr; }; struct ieee802154_llsec_table { struct list_head keys; struct list_head devices; struct list_head security_levels; }; struct ieee802154_llsec_seclevel { struct list_head list; u8 frame_type; u8 cmd_frame_id; bool device_override; u32 sec_levels; }; struct ieee802154_llsec_device { struct list_head list; __le16 pan_id; __le16 short_addr; __le64 hwaddr; u32 frame_counter; bool seclevel_exempt; u8 key_mode; struct list_head keys; }; struct ieee802154_llsec_device_key { struct list_head list; struct ieee802154_llsec_key_id key_id; u32 frame_counter; }; struct wpan_dev_header_ops { /* TODO create callback currently assumes ieee802154_mac_cb inside * skb->cb. This should be changed to give these information as * parameter. */ int (*create)(struct sk_buff *skb, struct net_device *dev, const struct ieee802154_addr *daddr, const struct ieee802154_addr *saddr, unsigned int len); }; struct wpan_dev { struct wpan_phy *wpan_phy; int iftype; /* the remainder of this struct should be private to cfg802154 */ struct list_head list; struct net_device *netdev; const struct wpan_dev_header_ops *header_ops; /* lowpan interface, set when the wpan_dev belongs to one lowpan_dev */ struct net_device *lowpan_dev; u32 identifier; /* MAC PIB */ __le16 pan_id; __le16 short_addr; __le64 extended_addr; /* MAC BSN field */ atomic_t bsn; /* MAC DSN field */ atomic_t dsn; u8 min_be; u8 max_be; u8 csma_retries; s8 frame_retries; bool lbt; bool promiscuous_mode; /* fallback for acknowledgment bit setting */ bool ackreq; }; #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) static inline int wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, const struct ieee802154_addr *daddr, const struct ieee802154_addr *saddr, unsigned int len) { struct wpan_dev *wpan_dev = dev->ieee802154_ptr; return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len); } struct wpan_phy * wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size); static inline void wpan_phy_set_dev(struct wpan_phy *phy, struct device *dev) { phy->dev.parent = dev; } int wpan_phy_register(struct wpan_phy *phy); void wpan_phy_unregister(struct wpan_phy *phy); void wpan_phy_free(struct wpan_phy *phy); /* Same semantics as for class_for_each_device */ int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), void *data); static inline void *wpan_phy_priv(struct wpan_phy *phy) { BUG_ON(!phy); return &phy->priv; } struct wpan_phy *wpan_phy_find(const char *str); static inline void wpan_phy_put(struct wpan_phy *phy) { put_device(&phy->dev); } static inline const char *wpan_phy_name(struct wpan_phy *phy) { return dev_name(&phy->dev); } #endif /* __NET_CFG802154_H */
3 2 3 3 3 1 1 1 1 3 3 3 3 3 2 2 2 3 2 3 1 1 1 1 1 1 3 3 2 1 1 1 1 2 2 2 2 2 2 3 5 1 1 1 3 3 2 3 4 3 4 2 5 5 5 5 5 5 5 2 2 2 2 5 5 1 5 5 3 2 3 2 3 2 3 3 3 3 2 2 3 3 3 3 3 2 2 2 2 2 1 2 2 2 2 2 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, OGAWA Hirofumi */ #include <linux/blkdev.h> #include <linux/sched/signal.h> #include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); void (*ent_set_ptr)(struct fat_entry *, int); int (*ent_bread)(struct super_block *, struct fat_entry *, int, sector_t); int (*ent_get)(struct fat_entry *); void (*ent_put)(struct fat_entry *, int); int (*ent_next)(struct fat_entry *); }; static DEFINE_SPINLOCK(fat12_entry_lock); static void fat12_ent_blocknr(struct super_block *sb, int entry, int *offset, sector_t *blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = entry + (entry >> 1); WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } static void fat_ent_blocknr(struct super_block *sb, int entry, int *offset, sector_t *blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = (entry << sbi->fatent_shift); WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } static void fat12_ent_set_ptr(struct fat_entry *fatent, int offset) { struct buffer_head **bhs = fatent->bhs; if (fatent->nr_bhs == 1) { WARN_ON(offset >= (bhs[0]->b_size - 1)); fatent->u.ent12_p[0] = bhs[0]->b_data + offset; fatent->u.ent12_p[1] = bhs[0]->b_data + (offset + 1); } else { WARN_ON(offset != (bhs[0]->b_size - 1)); fatent->u.ent12_p[0] = bhs[0]->b_data + offset; fatent->u.ent12_p[1] = bhs[1]->b_data; } } static void fat16_ent_set_ptr(struct fat_entry *fatent, int offset) { WARN_ON(offset & (2 - 1)); fatent->u.ent16_p = (__le16 *)(fatent->bhs[0]->b_data + offset); } static void fat32_ent_set_ptr(struct fat_entry *fatent, int offset) { WARN_ON(offset & (4 - 1)); fatent->u.ent32_p = (__le32 *)(fatent->bhs[0]->b_data + offset); } static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { struct buffer_head **bhs = fatent->bhs; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); fatent->fat_inode = MSDOS_SB(sb)->fat_inode; bhs[0] = sb_bread(sb, blocknr); if (!bhs[0]) goto err; if ((offset + 1) < sb->s_blocksize) fatent->nr_bhs = 1; else { /* This entry is block boundary, it needs the next block */ blocknr++; bhs[1] = sb_bread(sb, blocknr); if (!bhs[1]) goto err_brelse; fatent->nr_bhs = 2; } fat12_ent_set_ptr(fatent, offset); return 0; err_brelse: brelse(bhs[0]); err: fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; ops->ent_set_ptr(fatent, offset); return 0; } static int fat12_ent_get(struct fat_entry *fatent) { u8 **ent12_p = fatent->u.ent12_p; int next; spin_lock(&fat12_entry_lock); if (fatent->entry & 1) next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); else next = (*ent12_p[1] << 8) | *ent12_p[0]; spin_unlock(&fat12_entry_lock); next &= 0x0fff; if (next >= BAD_FAT12) next = FAT_ENT_EOF; return next; } static int fat16_ent_get(struct fat_entry *fatent) { int next = le16_to_cpu(*fatent->u.ent16_p); WARN_ON((unsigned long)fatent->u.ent16_p & (2 - 1)); if (next >= BAD_FAT16) next = FAT_ENT_EOF; return next; } static int fat32_ent_get(struct fat_entry *fatent) { int next = le32_to_cpu(*fatent->u.ent32_p) & 0x0fffffff; WARN_ON((unsigned long)fatent->u.ent32_p & (4 - 1)); if (next >= BAD_FAT32) next = FAT_ENT_EOF; return next; } static void fat12_ent_put(struct fat_entry *fatent, int new) { u8 **ent12_p = fatent->u.ent12_p; if (new == FAT_ENT_EOF) new = EOF_FAT12; spin_lock(&fat12_entry_lock); if (fatent->entry & 1) { *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); *ent12_p[1] = new >> 4; } else { *ent12_p[0] = new & 0xff; *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); } spin_unlock(&fat12_entry_lock); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); if (fatent->nr_bhs == 2) mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); } static void fat16_ent_put(struct fat_entry *fatent, int new) { if (new == FAT_ENT_EOF) new = EOF_FAT16; *fatent->u.ent16_p = cpu_to_le16(new); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static void fat32_ent_put(struct fat_entry *fatent, int new) { WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static int fat12_ent_next(struct fat_entry *fatent) { u8 **ent12_p = fatent->u.ent12_p; struct buffer_head **bhs = fatent->bhs; u8 *nextp = ent12_p[1] + 1 + (fatent->entry & 1); fatent->entry++; if (fatent->nr_bhs == 1) { WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 2))); WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { ent12_p[0] = nextp - 1; ent12_p[1] = nextp; return 1; } } else { WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); ent12_p[0] = nextp - 1; ent12_p[1] = nextp; brelse(bhs[0]); bhs[0] = bhs[1]; fatent->nr_bhs = 1; return 1; } ent12_p[0] = NULL; ent12_p[1] = NULL; return 0; } static int fat16_ent_next(struct fat_entry *fatent) { const struct buffer_head *bh = fatent->bhs[0]; fatent->entry++; if (fatent->u.ent16_p < (__le16 *)(bh->b_data + (bh->b_size - 2))) { fatent->u.ent16_p++; return 1; } fatent->u.ent16_p = NULL; return 0; } static int fat32_ent_next(struct fat_entry *fatent) { const struct buffer_head *bh = fatent->bhs[0]; fatent->entry++; if (fatent->u.ent32_p < (__le32 *)(bh->b_data + (bh->b_size - 4))) { fatent->u.ent32_p++; return 1; } fatent->u.ent32_p = NULL; return 0; } static const struct fatent_operations fat12_ops = { .ent_blocknr = fat12_ent_blocknr, .ent_set_ptr = fat12_ent_set_ptr, .ent_bread = fat12_ent_bread, .ent_get = fat12_ent_get, .ent_put = fat12_ent_put, .ent_next = fat12_ent_next, }; static const struct fatent_operations fat16_ops = { .ent_blocknr = fat_ent_blocknr, .ent_set_ptr = fat16_ent_set_ptr, .ent_bread = fat_ent_bread, .ent_get = fat16_ent_get, .ent_put = fat16_ent_put, .ent_next = fat16_ent_next, }; static const struct fatent_operations fat32_ops = { .ent_blocknr = fat_ent_blocknr, .ent_set_ptr = fat32_ent_set_ptr, .ent_bread = fat_ent_bread, .ent_get = fat32_ent_get, .ent_put = fat32_ent_put, .ent_next = fat32_ent_next, }; static inline void lock_fat(struct msdos_sb_info *sbi) { mutex_lock(&sbi->fat_lock); } static inline void unlock_fat(struct msdos_sb_info *sbi) { mutex_unlock(&sbi->fat_lock); } void fat_ent_access_init(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); mutex_init(&sbi->fat_lock); if (is_fat32(sbi)) { sbi->fatent_shift = 2; sbi->fatent_ops = &fat32_ops; } else if (is_fat16(sbi)) { sbi->fatent_shift = 1; sbi->fatent_ops = &fat16_ops; } else if (is_fat12(sbi)) { sbi->fatent_shift = -1; sbi->fatent_ops = &fat12_ops; } else { fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits); } } static void mark_fsinfo_dirty(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); if (sb_rdonly(sb) || !is_fat32(sbi)) return; __mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC); } static inline int fat_ent_update_ptr(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct buffer_head **bhs = fatent->bhs; /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; if (is_fat12(sbi)) { if ((offset + 1) < sb->s_blocksize) { /* This entry is on bhs[0]. */ if (fatent->nr_bhs == 2) { brelse(bhs[1]); fatent->nr_bhs = 1; } } else { /* This entry needs the next block. */ if (fatent->nr_bhs != 2) return 0; if (bhs[1]->b_blocknr != (blocknr + 1)) return 0; } } ops->ent_set_ptr(fatent, offset); return 1; } int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); const struct fatent_operations *ops = sbi->fatent_ops; int err, offset; sector_t blocknr; if (!fat_valid_entry(sbi, entry)) { fatent_brelse(fatent); fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } fatent_set_entry(fatent, entry); ops->ent_blocknr(sb, entry, &offset, &blocknr); if (!fat_ent_update_ptr(sb, fatent, offset, blocknr)) { fatent_brelse(fatent); err = ops->ent_bread(sb, fatent, offset, blocknr); if (err) return err; } return ops->ent_get(fatent); } /* FIXME: We can write the blocks as more big chunk. */ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, int nr_bhs) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *c_bh; int err, n, copy; err = 0; for (copy = 1; copy < sbi->fats; copy++) { sector_t backup_fat = sbi->fat_length * copy; for (n = 0; n < nr_bhs; n++) { c_bh = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr); if (!c_bh) { err = -ENOMEM; goto error; } /* Avoid race with userspace read via bdev */ lock_buffer(c_bh); memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); unlock_buffer(c_bh); mark_buffer_dirty_inode(c_bh, sbi->fat_inode); if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); if (err) goto error; } } error: return err; } int fat_ent_write(struct inode *inode, struct fat_entry *fatent, int new, int wait) { struct super_block *sb = inode->i_sb; const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; int err; ops->ent_put(fatent, new); if (wait) { err = fat_sync_bhs(fatent->bhs, fatent->nr_bhs); if (err) return err; } return fat_mirror_bhs(sb, fatent->bhs, fatent->nr_bhs); } static inline int fat_ent_next(struct msdos_sb_info *sbi, struct fat_entry *fatent) { if (sbi->fatent_ops->ent_next(fatent)) { if (fatent->entry < sbi->max_cluster) return 1; } return 0; } static inline int fat_ent_read_block(struct super_block *sb, struct fat_entry *fatent) { const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; sector_t blocknr; int offset; fatent_brelse(fatent); ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); return ops->ent_bread(sb, fatent, offset, blocknr); } static void fat_collect_bhs(struct buffer_head **bhs, int *nr_bhs, struct fat_entry *fatent) { int n, i; for (n = 0; n < fatent->nr_bhs; n++) { for (i = 0; i < *nr_bhs; i++) { if (fatent->bhs[n] == bhs[i]) break; } if (i == *nr_bhs) { get_bh(fatent->bhs[n]); bhs[i] = fatent->bhs[n]; (*nr_bhs)++; } } } int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent, prev_ent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, count, err, nr_bhs, idx_clus; BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */ lock_fat(sbi); if (sbi->free_clusters != -1 && sbi->free_clus_valid && sbi->free_clusters < nr_cluster) { unlock_fat(sbi); return -ENOSPC; } err = nr_bhs = idx_clus = 0; count = FAT_START_ENT; fatent_init(&prev_ent); fatent_init(&fatent); fatent_set_entry(&fatent, sbi->prev_free + 1); while (count < sbi->max_cluster) { if (fatent.entry >= sbi->max_cluster) fatent.entry = FAT_START_ENT; fatent_set_entry(&fatent, fatent.entry); err = fat_ent_read_block(sb, &fatent); if (err) goto out; /* Find the free entries in a block */ do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) { int entry = fatent.entry; /* make the cluster chain */ ops->ent_put(&fatent, FAT_ENT_EOF); if (prev_ent.nr_bhs) ops->ent_put(&prev_ent, entry); fat_collect_bhs(bhs, &nr_bhs, &fatent); sbi->prev_free = entry; if (sbi->free_clusters != -1) sbi->free_clusters--; cluster[idx_clus] = entry; idx_clus++; if (idx_clus == nr_cluster) goto out; /* * fat_collect_bhs() gets ref-count of bhs, * so we can still use the prev_ent. */ prev_ent = fatent; } count++; if (count == sbi->max_cluster) break; } while (fat_ent_next(sbi, &fatent)); } /* Couldn't allocate the free entries */ sbi->free_clusters = 0; sbi->free_clus_valid = 1; err = -ENOSPC; out: unlock_fat(sbi); mark_fsinfo_dirty(sb); fatent_brelse(&fatent); if (!err) { if (inode_needs_sync(inode)) err = fat_sync_bhs(bhs, nr_bhs); if (!err) err = fat_mirror_bhs(sb, bhs, nr_bhs); } for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); if (err && idx_clus) fat_free_clusters(inode, cluster[0]); return err; } int fat_free_clusters(struct inode *inode, int cluster) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, err, nr_bhs; int first_cl = cluster, dirty_fsinfo = 0; nr_bhs = 0; fatent_init(&fatent); lock_fat(sbi); do { cluster = fat_ent_read(inode, &fatent, cluster); if (cluster < 0) { err = cluster; goto error; } else if (cluster == FAT_ENT_FREE) { fat_fs_error(sb, "%s: deleting FAT entry beyond EOF", __func__); err = -EIO; goto error; } if (sbi->options.discard) { /* * Issue discard for the sectors we no longer * care about, batching contiguous clusters * into one request */ if (cluster != fatent.entry + 1) { int nr_clus = fatent.entry - first_cl + 1; sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), nr_clus * sbi->sec_per_clus, GFP_NOFS, 0); first_cl = cluster; } } ops->ent_put(&fatent, FAT_ENT_FREE); if (sbi->free_clusters != -1) { sbi->free_clusters++; dirty_fsinfo = 1; } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; } err = fat_mirror_bhs(sb, bhs, nr_bhs); if (err) goto error; for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); nr_bhs = 0; } fat_collect_bhs(bhs, &nr_bhs, &fatent); } while (cluster != FAT_ENT_EOF); if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; } err = fat_mirror_bhs(sb, bhs, nr_bhs); error: fatent_brelse(&fatent); for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); unlock_fat(sbi); if (dirty_fsinfo) mark_fsinfo_dirty(sb); return err; } EXPORT_SYMBOL_GPL(fat_free_clusters); struct fatent_ra { sector_t cur; sector_t limit; unsigned int ra_blocks; sector_t ra_advance; sector_t ra_next; sector_t ra_limit; }; static void fat_ra_init(struct super_block *sb, struct fatent_ra *ra, struct fat_entry *fatent, int ent_limit) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; sector_t blocknr, block_end; int offset; /* * This is the sequential read, so ra_pages * 2 (but try to * align the optimal hardware IO size). * [BTW, 128kb covers the whole sectors for FAT12 and FAT16] */ unsigned long ra_pages = sb->s_bdi->ra_pages; unsigned int reada_blocks; if (fatent->entry >= ent_limit) return; if (ra_pages > sb->s_bdi->io_pages) ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages); reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1); /* Initialize the range for sequential read */ ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); ops->ent_blocknr(sb, ent_limit - 1, &offset, &block_end); ra->cur = 0; ra->limit = (block_end + 1) - blocknr; /* Advancing the window at half size */ ra->ra_blocks = reada_blocks >> 1; ra->ra_advance = ra->cur; ra->ra_next = ra->cur; ra->ra_limit = ra->cur + min_t(sector_t, reada_blocks, ra->limit); } /* Assuming to be called before reading a new block (increments ->cur). */ static void fat_ent_reada(struct super_block *sb, struct fatent_ra *ra, struct fat_entry *fatent) { if (ra->ra_next >= ra->ra_limit) return; if (ra->cur >= ra->ra_advance) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct blk_plug plug; sector_t blocknr, diff; int offset; ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); diff = blocknr - ra->cur; blk_start_plug(&plug); /* * FIXME: we would want to directly use the bio with * pages to reduce the number of segments. */ for (; ra->ra_next < ra->ra_limit; ra->ra_next++) sb_breadahead(sb, ra->ra_next + diff); blk_finish_plug(&plug); /* Advance the readahead window */ ra->ra_advance += ra->ra_blocks; ra->ra_limit += min_t(sector_t, ra->ra_blocks, ra->limit - ra->ra_limit); } ra->cur++; } int fat_count_free_clusters(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct fatent_ra fatent_ra; int err = 0, free; lock_fat(sbi); if (sbi->free_clusters != -1 && sbi->free_clus_valid) goto out; free = 0; fatent_init(&fatent); fatent_set_entry(&fatent, FAT_START_ENT); fat_ra_init(sb, &fatent_ra, &fatent, sbi->max_cluster); while (fatent.entry < sbi->max_cluster) { /* readahead of fat blocks */ fat_ent_reada(sb, &fatent_ra, &fatent); err = fat_ent_read_block(sb, &fatent); if (err) goto out; do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; mark_fsinfo_dirty(sb); fatent_brelse(&fatent); out: unlock_fat(sbi); return err; } static int fat_trim_clusters(struct super_block *sb, u32 clus, u32 nr_clus) { struct msdos_sb_info *sbi = MSDOS_SB(sb); return sb_issue_discard(sb, fat_clus_to_blknr(sbi, clus), nr_clus * sbi->sec_per_clus, GFP_NOFS, 0); } int fat_trim_fs(struct inode *inode, struct fstrim_range *range) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct fatent_ra fatent_ra; u64 ent_start, ent_end, minlen, trimmed = 0; u32 free = 0; int err = 0; /* * FAT data is organized as clusters, trim at the granulary of cluster. * * fstrim_range is in byte, convert vaules to cluster index. * Treat sectors before data region as all used, not to trim them. */ ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT); ent_end = ent_start + (range->len >> sbi->cluster_bits) - 1; minlen = range->minlen >> sbi->cluster_bits; if (ent_start >= sbi->max_cluster || range->len < sbi->cluster_size) return -EINVAL; if (ent_end >= sbi->max_cluster) ent_end = sbi->max_cluster - 1; fatent_init(&fatent); lock_fat(sbi); fatent_set_entry(&fatent, ent_start); fat_ra_init(sb, &fatent_ra, &fatent, ent_end + 1); while (fatent.entry <= ent_end) { /* readahead of fat blocks */ fat_ent_reada(sb, &fatent_ra, &fatent); err = fat_ent_read_block(sb, &fatent); if (err) goto error; do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) { free++; } else if (free) { if (free >= minlen) { u32 clus = fatent.entry - free; err = fat_trim_clusters(sb, clus, free); if (err && err != -EOPNOTSUPP) goto error; if (!err) trimmed += free; err = 0; } free = 0; } } while (fat_ent_next(sbi, &fatent) && fatent.entry <= ent_end); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto error; } if (need_resched()) { fatent_brelse(&fatent); unlock_fat(sbi); cond_resched(); lock_fat(sbi); } } /* handle scenario when tail entries are all free */ if (free && free >= minlen) { u32 clus = fatent.entry - free; err = fat_trim_clusters(sb, clus, free); if (err && err != -EOPNOTSUPP) goto error; if (!err) trimmed += free; err = 0; } error: fatent_brelse(&fatent); unlock_fat(sbi); range->len = trimmed << sbi->cluster_bits; return err; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_ECN_H_ #define _INET_ECN_H_ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/inet_sock.h> #include <net/dsfield.h> enum { INET_ECN_NOT_ECT = 0, INET_ECN_ECT_1 = 1, INET_ECN_ECT_0 = 2, INET_ECN_CE = 3, INET_ECN_MASK = 3, }; extern int sysctl_tunnel_ecn_log; static inline int INET_ECN_is_ce(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_CE; } static inline int INET_ECN_is_not_ect(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; } static inline int INET_ECN_is_capable(__u8 dsfield) { return dsfield & INET_ECN_ECT_0; } /* * RFC 3168 9.1.1 * The full-functionality option for ECN encapsulation is to copy the * ECN codepoint of the inside header to the outside header on * encapsulation if the inside header is not-ECT or ECT, and to set the * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of * the inside header is CE. */ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) { outer &= ~INET_ECN_MASK; outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : INET_ECN_ECT_0; return outer; } static inline void INET_ECN_xmit(struct sock *sk) { inet_sk(sk)->tos |= INET_ECN_ECT_0; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } static inline void INET_ECN_dontxmit(struct sock *sk) { inet_sk(sk)->tos &= ~INET_ECN_MASK; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass &= ~INET_ECN_MASK; } #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ } while (0) #define IP6_ECN_flow_xmit(sk, label) do { \ if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ (label) |= htonl(INET_ECN_ECT_0 << 20); \ } while (0) static inline int IP_ECN_set_ce(struct iphdr *iph) { u32 check = (__force u32)iph->check; u32 ecn = (iph->tos + 1) & INET_ECN_MASK; /* * After the last operation we have (in binary): * INET_ECN_NOT_ECT => 01 * INET_ECN_ECT_1 => 10 * INET_ECN_ECT_0 => 11 * INET_ECN_CE => 00 */ if (!(ecn & 2)) return !ecn; /* * The following gives us: * INET_ECN_ECT_1 => check += htons(0xFFFD) * INET_ECN_ECT_0 => check += htons(0xFFFE) */ check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos |= INET_ECN_CE; return 1; } static inline int IP_ECN_set_ect1(struct iphdr *iph) { u32 check = (__force u32)iph->check; if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; check += (__force u16)htons(0x1); iph->check = (__force __sum16)(check + (check>=0xFFFF)); iph->tos ^= INET_ECN_MASK; return 1; } static inline void IP_ECN_clear(struct iphdr *iph) { iph->tos &= ~INET_ECN_MASK; } static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) { dscp &= ~INET_ECN_MASK; ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); } struct ipv6hdr; /* Note: * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE * In IPv6 case, no checksum compensates the change in IPv6 header, * so we have to update skb->csum. */ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; from = *(__be32 *)iph; to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; from = *(__be32 *)iph; to = from ^ htonl(INET_ECN_MASK << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); } static inline int INET_ECN_set_ce(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ce(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } return 0; } static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ect1(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); break; } return 0; } /* * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant * tunnel egress MUST set the outgoing ECN field to the codepoint at the * intersection of the appropriate arriving inner header (row) and outer * header (column) in Figure 4 * * +---------+------------------------------------------------+ * |Arriving | Arriving Outer Header | * | Inner +---------+------------+------------+------------+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE | * +---------+---------+------------+------------+------------+ * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | * | CE | CE | CE | CE(!!!)| CE | * +---------+---------+------------+------------+------------+ * * Figure 4: New IP in IP Decapsulation Behaviour * * returns 0 on success * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { case INET_ECN_NOT_ECT: return 0; case INET_ECN_ECT_0: case INET_ECN_ECT_1: return 1; case INET_ECN_CE: return 2; } } *set_ce = INET_ECN_is_ce(outer); return 0; } static inline int INET_ECN_decapsulate(struct sk_buff *skb, __u8 outer, __u8 inner) { bool set_ce = false; int rc; rc = __INET_ECN_decapsulate(outer, inner, &set_ce); if (!rc) { if (set_ce) INET_ECN_set_ce(skb); else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) INET_ECN_set_ect1(skb); } return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, oiph->tos, inner); } static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_LOCAL_H #define _ASM_X86_LOCAL_H #include <linux/percpu.h> #include <linux/atomic.h> #include <asm/asm.h> typedef struct { atomic_long_t a; } local_t; #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) #define local_set(l, i) atomic_long_set(&(l)->a, (i)) static inline void local_inc(local_t *l) { asm volatile(_ASM_INC "%0" : "+m" (l->a.counter)); } static inline void local_dec(local_t *l) { asm volatile(_ASM_DEC "%0" : "+m" (l->a.counter)); } static inline void local_add(long i, local_t *l) { asm volatile(_ASM_ADD "%1,%0" : "+m" (l->a.counter) : "ir" (i)); } static inline void local_sub(long i, local_t *l) { asm volatile(_ASM_SUB "%1,%0" : "+m" (l->a.counter) : "ir" (i)); } /** * local_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @l: pointer to type local_t * * Atomically subtracts @i from @l and returns * true if the result is zero, or false for all * other cases. */ static inline bool local_sub_and_test(long i, local_t *l) { return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i); } /** * local_dec_and_test - decrement and test * @l: pointer to type local_t * * Atomically decrements @l by 1 and * returns true if the result is 0, or false for all other * cases. */ static inline bool local_dec_and_test(local_t *l) { return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e); } /** * local_inc_and_test - increment and test * @l: pointer to type local_t * * Atomically increments @l by 1 * and returns true if the result is zero, or false for all * other cases. */ static inline bool local_inc_and_test(local_t *l) { return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e); } /** * local_add_negative - add and test if negative * @i: integer value to add * @l: pointer to type local_t * * Atomically adds @i to @l and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static inline bool local_add_negative(long i, local_t *l) { return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i); } /** * local_add_return - add and return * @i: integer value to add * @l: pointer to type local_t * * Atomically adds @i to @l and returns @i + @l */ static inline long local_add_return(long i, local_t *l) { long __i = i; asm volatile(_ASM_XADD "%0, %1;" : "+r" (i), "+m" (l->a.counter) : : "memory"); return i + __i; } static inline long local_sub_return(long i, local_t *l) { return local_add_return(-i, l); } #define local_inc_return(l) (local_add_return(1, l)) #define local_dec_return(l) (local_sub_return(1, l)) #define local_cmpxchg(l, o, n) \ (cmpxchg_local(&((l)->a.counter), (o), (n))) /* Always has a lock prefix */ #define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) /** * local_add_unless - add unless the number is a given value * @l: pointer of type local_t * @a: the amount to add to l... * @u: ...unless l is equal to u. * * Atomically adds @a to @l, so long as it was not @u. * Returns non-zero if @l was not @u, and zero otherwise. */ #define local_add_unless(l, a, u) \ ({ \ long c, old; \ c = local_read((l)); \ for (;;) { \ if (unlikely(c == (u))) \ break; \ old = local_cmpxchg((l), c, c + (a)); \ if (likely(old == c)) \ break; \ c = old; \ } \ c != (u); \ }) #define local_inc_not_zero(l) local_add_unless((l), 1, 0) /* On x86_32, these are no better than the atomic variants. * On x86-64 these are better than the atomic variants on SMP kernels * because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) #define __local_add(i, l) local_add((i), (l)) #define __local_sub(i, l) local_sub((i), (l)) #endif /* _ASM_X86_LOCAL_H */
7 6 2 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 // SPDX-License-Identifier: GPL-2.0 /* * linux/kernel/capability.c * * Copyright (C) 1997 Andrew Main <zefram@fysh.org> * * Integrated into 2.1.97+, Andrew G. Morgan <morgan@kernel.org> * 30 May 2002: Cleanup, Robert M. Love <rml@tech9.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/audit.h> #include <linux/capability.h> #include <linux/mm.h> #include <linux/export.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/uaccess.h> /* * Leveraged for setting/resetting capabilities */ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; EXPORT_SYMBOL(__cap_empty_set); int file_caps_enabled = 1; static int __init file_caps_disable(char *str) { file_caps_enabled = 0; return 1; } __setup("no_file_caps", file_caps_disable); #ifdef CONFIG_MULTIUSER /* * More recent versions of libcap are available from: * * http://www.kernel.org/pub/linux/libs/security/linux-privs/ */ static void warn_legacy_capability_use(void) { char name[sizeof(current->comm)]; pr_info_once("warning: `%s' uses 32-bit capabilities (legacy support in use)\n", get_task_comm(name, current)); } /* * Version 2 capabilities worked fine, but the linux/capability.h file * that accompanied their introduction encouraged their use without * the necessary user-space source code changes. As such, we have * created a version 3 with equivalent functionality to version 2, but * with a header change to protect legacy source code from using * version 2 when it wanted to use version 1. If your system has code * that trips the following warning, it is using version 2 specific * capabilities and may be do