diff --git a/examples/BuddyLlama/llama-main.cpp b/examples/BuddyLlama/llama-main.cpp index 3e73dc37..8ebfe740 100644 --- a/examples/BuddyLlama/llama-main.cpp +++ b/examples/BuddyLlama/llama-main.cpp @@ -129,9 +129,7 @@ int main() { // - Output container. // - Parameters container. Text outputContainer; - MemRef resultContainer[2] = { - MemRef({1, MaxTokenLength, HiddenSize}, false, 0), - MemRef({1, MaxTokenLength, MaxVocabSize}, false, 0)}; + MemRef resultContainer({1, MaxTokenLength, HiddenSize}, false, 0); Text inputContainer(inputStr); MemRef paramsContainer({ParamsSize}); @@ -151,7 +149,7 @@ int main() { for (int i = 0; i < generateLen; i++) { const auto inferenceStart = std::chrono::high_resolution_clock::now(); // Execute the forward pass of the model. - _mlir_ciface_forward(resultContainer, ¶msContainer, &inputContainer); + _mlir_ciface_forward(&resultContainer, ¶msContainer, &inputContainer); const auto inferenceEnd = std::chrono::high_resolution_clock::now(); const std::chrono::duration inferenceTime = @@ -160,7 +158,7 @@ int main() { // Determine the generated token. int tokenIndex = inputContainer.getTokenCnt() - 1; const float *startPtr = - resultContainer[1].getData() + tokenIndex * MaxVocabSize; + resultContainer.getData() + tokenIndex * MaxVocabSize; const float *endPtr = startPtr + MaxVocabSize; int maxIndex = findMaxIndex(startPtr, endPtr); std::string tok = inputContainer.getStr(maxIndex); @@ -175,8 +173,7 @@ int main() { // Append the generated token into the input and output container. inputContainer.appendTokenIdx(maxIndex); outputContainer.appendTokenIdx(maxIndex); - free(resultContainer[0].release()); - free(resultContainer[1].release()); + free(resultContainer.release()); } /// Print the final result diff --git a/examples/BuddyLlama/op.txt b/examples/BuddyLlama/op.txt new file mode 100644 index 00000000..24920f4e --- /dev/null +++ b/examples/BuddyLlama/op.txt @@ -0,0 +1,2907 @@ +0: +1: +2: +3: +4: +5: +6: +7: +8: +9: +10: +11: +12: +13: +14: +15: +16: +17: +18: +19: +20: +21: +22: +23: +24: +25: +26: +27: +28: +29: +30: +31: +32: +33: +34: +35: +36: +37: +38: +39: +40: +41: +42: +43: +44: +45: +46: +47: +48: +49: +50: +51: +52: +53: +54: +55: +56: +57: +58: +59: +60: +61: +62: +63: +64: +65: +66: +67: +68: +69: +70: +71: +72: +73: +74: +75: +76: +77: +78: +79: +80: +81: +82: +83: +84: +85: +86: +87: +88: +89: +90: +91: +92: +93: +94: +95: +96: +97: +98: +99: +100: +101: +102: +103: +104: +105: +106: +107: +108: +109: +110: +111: +112: +113: +114: +115: +116: +117: +118: +119: +120: +121: +122: +123: +124: +125: +126: +127: +128: +129: +130: +131: +132: +133: +134: +135: +136: +137: +138: +139: +140: +141: +142: +143: +144: +145: +146: +147: +148: +149: +150: +151: +152: +153: +154: +155: +156: +157: +158: +159: +160: +161: +162: +163: +164: +165: +166: +167: +168: +169: +170: +171: +172: +173: +174: +175: +176: +177: +178: +179: +180: +181: +182: +183: +184: +185: +186: +187: +188: +189: +190: +191: +192: +193: +194: +195: +196: +197: +198: +199: +200: +201: +202: +203: +204: +205: +206: +207: +208: +209: +210: +211: +212: +213: +214: +215: +216: +217: +218: +219: +220: +221: +222: +223: +224: +225: +226: +227: +228: +229: +230: +231: +232: +233: +234: +235: +236: +237: +238: +239: +240: +241: +242: +243: +244: +245: +246: +247: +248: +249: +250: +251: +252: +253: +254: +255: +256: +257: +258: +259: +260: +261: +262: +263: +264: +265: +266: +267: +268: +269: +270: +271: +272: +273: +274: +275: +276: +277: +278: +279: +280: +281: +282: +283: +284: +285: +286: +287: +288: +289: +290: +291: +292: +293: +294: +295: +296: +297: +298: +299: +300: +301: +302: +303: +304: +305: +306: +307: +308: +309: +310: +311: +312: +313: +314: +315: +316: +317: +318: +319: +320: +321: +322: +323: +324: +325: +326: +327: +328: +329: +330: +331: +332: +333: // RMSNorm +334: +335: +336: +337: +338: +339: +340: +341: +342: +343: +344: +345: +346: +347: +348: +349: +350: +351: +352: +353: +354: +355: +356: +357: +358: +359: +360: +361: +362: +363: +364: +365: +366: +367: +368: +369: +370: +371: +372: +373: +374: +375: +376: +377: +378: +379: +380: +381: +382: +383: +384: +385: +386: +387: +388: +389: +390: +391: // +392: +393: +394: +395: +396: +397: +398: +399: +400: +401: +402: +403: +404: +405: +406: +407: +408: +409: +410: +411: +412: +413: +414: +415: +416: +417: +418: +419: +420: +421: +422: +423: +424: +425: +426: +427: +428: +429: +430: +431: +432: +433: +434: +435: +436: +437: +438: +439: +440: +441: +442: +443: +444: +445: +446: +447: +448: +449: +450: +451: +452: +453: +454: +455: +456: +457: +458: +459: +460: +461: +462: +463: +464: +465: +466: +467: +468: +469: +470: +471: +472: +473: +474: +475: +476: +477: +478: +479: +480: +481: +482: +483: +484: +485: +486: +487: +488: +489: +490: +491: +492: +493: +494: +495: +496: +497: +498: +499: +500: +501: +502: +503: +504: +505: +506: +507: +508: +509: +510: +511: +512: +513: +514: +515: +516: +517: +518: +519: +520: +521: +522: +523: +524: +525: +526: +527: +528: +529: +530: +531: +532: +533: +534: +535: +536: +537: +538: +539: +540: +541: +542: +543: +544: +545: +546: +547: +548: +549: +550: +551: +552: +553: +554: +555: +556: +557: +558: +559: +560: +561: +562: +563: +564: +565: +566: +567: +568: +569: +570: +571: +572: +573: +574: +575: +576: +577: +578: +579: +580: +581: +582: +583: +584: +585: +586: +587: +588: +589: +590: +591: +592: +593: +594: +595: +596: +597: +598: +599: +600: +601: +602: +603: +604: +605: +606: +607: +608: +609: +610: +611: +612: +613: +614: +615: +616: +617: +618: +619: +620: +621: +622: +623: +624: +625: +626: +627: +628: +629: +630: +631: +632: +633: +634: +635: +636: +637: +638: +639: +640: +641: +642: +643: +644: +645: +646: +647: +648: +649: +650: +651: +652: +653: +654: +655: +656: +657: +658: +659: +660: +661: +662: +663: +664: +665: +666: +667: +668: +669: +670: +671: +672: +673: +674: +675: +676: +677: +678: +679: +680: +681: +682: +683: +684: +685: +686: +687: +688: +689: +690: +691: +692: +693: +694: +695: +696: +697: +698: +699: +700: +701: +702: +703: +704: +705: +706: +707: +708: +709: +710: +711: +712: +713: +714: +715: +716: +717: +718: +719: +720: +721: +722: +723: +724: +725: +726: +727: +728: +729: +730: +731: +732: +733: +734: +735: +736: +737: +738: +739: +740: +741: +742: +743: +744: +745: +746: +747: +748: +749: +750: +751: +752: +753: +754: +755: +756: +757: +758: +759: +760: +761: +762: +763: +764: +765: +766: +767: +768: +769: +770: +771: +772: +773: +774: +775: +776: +777: +778: +779: +780: +781: +782: +783: +784: +785: +786: +787: +788: +789: +790: +791: +792: +793: +794: +795: +796: +797: +798: +799: +800: +801: +802: +803: +804: +805: +806: +807: +808: +809: +810: +811: +812: +813: +814: +815: +816: +817: +818: +819: +820: +821: +822: +823: +824: +825: +826: +827: +828: +829: +830: +831: +832: +833: +834: +835: +836: +837: +838: +839: +840: +841: +842: +843: +844: +845: +846: +847: +848: +849: +850: +851: +852: +853: +854: +855: +856: +857: +858: +859: +860: +861: +862: +863: +864: +865: +866: +867: +868: +869: +870: +871: +872: +873: +874: +875: +876: +877: +878: +879: +880: +881: +882: +883: +884: +885: +886: +887: +888: +889: +890: +891: +892: +893: +894: +895: +896: +897: +898: +899: +900: +901: +902: +903: +904: +905: +906: +907: +908: +909: +910: +911: +912: +913: +914: +915: +916: +917: +918: +919: +920: +921: +922: +923: +924: +925: +926: +927: +928: +929: +930: +931: +932: +933: +934: +935: +936: +937: +938: +939: +940: +941: +942: +943: +944: +945: +946: +947: +948: +949: +950: +951: +952: +953: +954: +955: +956: +957: +958: +959: +960: +961: +962: +963: +964: +965: +966: +967: +968: +969: +970: +971: +972: +973: +974: +975: +976: +977: +978: +979: +980: +981: +982: +983: +984: +985: +986: +987: +988: +989: +990: +991: +992: +993: +994: +995: +996: +997: +998: +999: +1000: +1001: +1002: +1003: +1004: +1005: +1006: +1007: +1008: +1009: +1010: +1011: +1012: +1013: +1014: +1015: +1016: +1017: +1018: +1019: +1020: +1021: +1022: +1023: +1024: +1025: +1026: +1027: +1028: +1029: +1030: +1031: +1032: +1033: +1034: +1035: +1036: +1037: +1038: +1039: +1040: +1041: +1042: +1043: +1044: +1045: +1046: +1047: +1048: +1049: +1050: +1051: +1052: +1053: +1054: +1055: +1056: +1057: +1058: +1059: +1060: +1061: +1062: +1063: +1064: +1065: +1066: +1067: +1068: +1069: +1070: +1071: +1072: +1073: +1074: +1075: +1076: +1077: +1078: +1079: +1080: +1081: +1082: +1083: +1084: +1085: +1086: +1087: +1088: +1089: +1090: +1091: +1092: +1093: +1094: +1095: +1096: +1097: +1098: +1099: +1100: +1101: +1102: +1103: +1104: +1105: +1106: +1107: +1108: +1109: +1110: +1111: +1112: +1113: +1114: +1115: +1116: +1117: +1118: +1119: +1120: +1121: +1122: +1123: +1124: +1125: +1126: +1127: +1128: +1129: +1130: +1131: +1132: +1133: +1134: +1135: +1136: +1137: +1138: +1139: +1140: +1141: +1142: +1143: +1144: +1145: +1146: +1147: +1148: +1149: +1150: +1151: +1152: +1153: +1154: +1155: +1156: +1157: +1158: +1159: +1160: +1161: +1162: +1163: +1164: +1165: +1166: +1167: +1168: +1169: +1170: +1171: +1172: +1173: +1174: +1175: +1176: +1177: +1178: +1179: +1180: +1181: +1182: +1183: +1184: +1185: +1186: +1187: +1188: +1189: +1190: +1191: +1192: +1193: +1194: +1195: +1196: +1197: +1198: +1199: +1200: +1201: +1202: +1203: +1204: +1205: +1206: +1207: +1208: +1209: +1210: +1211: +1212: +1213: +1214: +1215: +1216: +1217: +1218: +1219: +1220: +1221: +1222: +1223: +1224: +1225: +1226: +1227: +1228: +1229: +1230: +1231: +1232: +1233: +1234: +1235: +1236: +1237: +1238: +1239: +1240: +1241: +1242: +1243: +1244: +1245: +1246: +1247: +1248: +1249: +1250: +1251: +1252: +1253: +1254: +1255: +1256: +1257: +1258: +1259: +1260: +1261: +1262: +1263: +1264: +1265: +1266: +1267: +1268: +1269: +1270: +1271: +1272: +1273: +1274: +1275: +1276: +1277: +1278: +1279: +1280: +1281: +1282: +1283: +1284: +1285: +1286: +1287: +1288: +1289: +1290: +1291: +1292: +1293: +1294: +1295: +1296: +1297: +1298: +1299: +1300: +1301: +1302: +1303: +1304: +1305: +1306: +1307: +1308: +1309: +1310: +1311: +1312: +1313: +1314: +1315: +1316: +1317: +1318: +1319: +1320: +1321: +1322: +1323: +1324: +1325: +1326: +1327: +1328: +1329: +1330: +1331: +1332: +1333: +1334: +1335: +1336: +1337: +1338: +1339: +1340: +1341: +1342: +1343: +1344: +1345: +1346: +1347: +1348: +1349: +1350: +1351: +1352: +1353: +1354: +1355: +1356: +1357: +1358: +1359: +1360: +1361: +1362: +1363: +1364: +1365: +1366: +1367: +1368: +1369: +1370: +1371: +1372: +1373: +1374: +1375: +1376: +1377: +1378: +1379: +1380: +1381: +1382: +1383: +1384: +1385: +1386: +1387: +1388: +1389: +1390: +1391: +1392: +1393: +1394: +1395: +1396: +1397: +1398: +1399: +1400: +1401: +1402: +1403: +1404: +1405: +1406: +1407: +1408: +1409: +1410: +1411: +1412: +1413: +1414: +1415: +1416: +1417: +1418: +1419: +1420: +1421: +1422: +1423: +1424: +1425: +1426: +1427: +1428: +1429: +1430: +1431: +1432: +1433: +1434: +1435: +1436: +1437: +1438: +1439: +1440: +1441: +1442: +1443: +1444: +1445: +1446: +1447: +1448: +1449: +1450: +1451: +1452: +1453: +1454: +1455: +1456: +1457: +1458: +1459: +1460: +1461: +1462: +1463: +1464: +1465: +1466: +1467: +1468: +1469: +1470: +1471: +1472: +1473: +1474: +1475: +1476: +1477: +1478: +1479: +1480: +1481: +1482: +1483: +1484: +1485: +1486: +1487: +1488: +1489: +1490: +1491: +1492: +1493: +1494: +1495: +1496: +1497: +1498: +1499: +1500: +1501: +1502: +1503: +1504: +1505: +1506: +1507: +1508: +1509: +1510: +1511: +1512: +1513: +1514: +1515: +1516: +1517: +1518: +1519: +1520: +1521: +1522: +1523: +1524: +1525: +1526: +1527: +1528: +1529: +1530: +1531: +1532: +1533: +1534: +1535: +1536: +1537: +1538: +1539: +1540: +1541: +1542: +1543: +1544: +1545: +1546: +1547: +1548: +1549: +1550: +1551: +1552: +1553: +1554: +1555: +1556: +1557: +1558: +1559: +1560: +1561: +1562: +1563: +1564: +1565: +1566: +1567: +1568: +1569: +1570: +1571: +1572: +1573: +1574: +1575: +1576: +1577: +1578: +1579: +1580: +1581: +1582: +1583: +1584: +1585: +1586: +1587: +1588: +1589: +1590: +1591: +1592: +1593: +1594: +1595: +1596: +1597: +1598: +1599: +1600: +1601: +1602: +1603: +1604: +1605: +1606: +1607: +1608: +1609: +1610: +1611: +1612: +1613: +1614: +1615: +1616: +1617: +1618: +1619: +1620: +1621: +1622: +1623: +1624: +1625: +1626: +1627: +1628: +1629: +1630: +1631: +1632: +1633: +1634: +1635: +1636: +1637: +1638: +1639: +1640: +1641: +1642: +1643: +1644: +1645: +1646: +1647: +1648: +1649: +1650: +1651: +1652: +1653: +1654: +1655: +1656: +1657: +1658: +1659: +1660: +1661: +1662: +1663: +1664: +1665: +1666: +1667: +1668: +1669: +1670: +1671: +1672: +1673: +1674: +1675: +1676: +1677: +1678: +1679: +1680: +1681: +1682: +1683: +1684: +1685: +1686: +1687: +1688: +1689: +1690: +1691: +1692: +1693: +1694: +1695: +1696: +1697: +1698: +1699: +1700: +1701: +1702: +1703: +1704: +1705: +1706: +1707: +1708: +1709: +1710: +1711: +1712: +1713: +1714: +1715: +1716: +1717: +1718: +1719: +1720: +1721: +1722: +1723: +1724: +1725: +1726: +1727: +1728: +1729: +1730: +1731: +1732: +1733: +1734: +1735: +1736: +1737: +1738: +1739: +1740: +1741: +1742: +1743: +1744: +1745: +1746: +1747: +1748: +1749: +1750: +1751: +1752: +1753: +1754: +1755: +1756: +1757: +1758: +1759: +1760: +1761: +1762: +1763: +1764: +1765: +1766: +1767: +1768: +1769: +1770: +1771: +1772: +1773: +1774: +1775: +1776: +1777: +1778: +1779: +1780: +1781: +1782: +1783: +1784: +1785: +1786: +1787: +1788: +1789: +1790: +1791: +1792: +1793: +1794: +1795: +1796: +1797: +1798: +1799: +1800: +1801: +1802: +1803: +1804: +1805: +1806: +1807: +1808: +1809: +1810: +1811: +1812: +1813: +1814: +1815: +1816: +1817: +1818: +1819: +1820: +1821: +1822: +1823: +1824: +1825: +1826: +1827: +1828: +1829: +1830: +1831: +1832: +1833: +1834: +1835: +1836: +1837: +1838: +1839: +1840: +1841: +1842: +1843: +1844: +1845: +1846: +1847: +1848: +1849: +1850: +1851: +1852: +1853: +1854: +1855: +1856: +1857: +1858: +1859: +1860: +1861: +1862: +1863: +1864: +1865: +1866: +1867: +1868: +1869: +1870: +1871: +1872: +1873: +1874: +1875: +1876: +1877: +1878: +1879: +1880: +1881: +1882: +1883: +1884: +1885: +1886: +1887: +1888: +1889: +1890: +1891: +1892: +1893: +1894: +1895: +1896: +1897: +1898: +1899: +1900: +1901: +1902: +1903: +1904: +1905: +1906: +1907: +1908: +1909: +1910: +1911: +1912: +1913: +1914: +1915: +1916: +1917: +1918: +1919: +1920: +1921: +1922: +1923: +1924: +1925: +1926: +1927: +1928: +1929: +1930: +1931: +1932: +1933: +1934: +1935: +1936: +1937: +1938: +1939: +1940: +1941: +1942: +1943: +1944: +1945: +1946: +1947: +1948: +1949: +1950: +1951: +1952: +1953: +1954: +1955: +1956: +1957: +1958: +1959: +1960: +1961: +1962: +1963: +1964: +1965: +1966: +1967: +1968: +1969: +1970: +1971: +1972: +1973: +1974: +1975: +1976: +1977: +1978: +1979: +1980: +1981: +1982: +1983: +1984: +1985: +1986: +1987: +1988: +1989: +1990: +1991: +1992: +1993: +1994: +1995: +1996: +1997: +1998: +1999: +2000: +2001: +2002: +2003: +2004: +2005: +2006: +2007: +2008: +2009: +2010: +2011: +2012: +2013: +2014: +2015: +2016: +2017: +2018: +2019: +2020: +2021: +2022: +2023: +2024: +2025: +2026: +2027: +2028: +2029: +2030: +2031: +2032: +2033: +2034: +2035: +2036: +2037: +2038: +2039: +2040: +2041: +2042: +2043: +2044: +2045: +2046: +2047: +2048: +2049: +2050: +2051: +2052: +2053: +2054: +2055: +2056: +2057: +2058: +2059: +2060: +2061: +2062: +2063: +2064: +2065: +2066: +2067: +2068: +2069: +2070: +2071: +2072: +2073: +2074: +2075: +2076: +2077: +2078: +2079: +2080: +2081: +2082: +2083: +2084: +2085: +2086: +2087: +2088: +2089: +2090: +2091: +2092: +2093: +2094: +2095: +2096: +2097: +2098: +2099: +2100: +2101: +2102: +2103: +2104: +2105: +2106: +2107: +2108: +2109: +2110: +2111: +2112: +2113: +2114: +2115: +2116: +2117: +2118: +2119: +2120: +2121: +2122: +2123: +2124: +2125: +2126: +2127: +2128: +2129: +2130: +2131: +2132: +2133: +2134: +2135: +2136: +2137: +2138: +2139: +2140: +2141: +2142: +2143: +2144: +2145: +2146: +2147: +2148: +2149: +2150: +2151: +2152: +2153: +2154: +2155: +2156: +2157: +2158: +2159: +2160: +2161: +2162: +2163: +2164: +2165: +2166: +2167: +2168: +2169: +2170: +2171: +2172: +2173: +2174: +2175: +2176: +2177: +2178: +2179: +2180: +2181: +2182: +2183: +2184: +2185: +2186: +2187: +2188: +2189: +2190: +2191: +2192: +2193: +2194: +2195: +2196: +2197: +2198: +2199: +2200: +2201: +2202: +2203: +2204: +2205: +2206: +2207: +2208: +2209: +2210: +2211: +2212: +2213: +2214: +2215: +2216: +2217: +2218: +2219: +2220: +2221: +2222: +2223: +2224: +2225: +2226: +2227: +2228: +2229: +2230: +2231: +2232: +2233: +2234: +2235: +2236: +2237: +2238: +2239: +2240: +2241: +2242: +2243: +2244: +2245: +2246: +2247: +2248: +2249: +2250: +2251: +2252: +2253: +2254: +2255: +2256: +2257: +2258: +2259: +2260: +2261: +2262: +2263: +2264: +2265: +2266: +2267: +2268: +2269: +2270: +2271: +2272: +2273: +2274: +2275: +2276: +2277: +2278: +2279: +2280: +2281: +2282: +2283: +2284: +2285: +2286: +2287: +2288: +2289: +2290: +2291: +2292: +2293: +2294: +2295: +2296: +2297: +2298: +2299: +2300: +2301: +2302: +2303: +2304: +2305: +2306: +2307: +2308: +2309: +2310: +2311: +2312: +2313: +2314: +2315: +2316: +2317: +2318: +2319: +2320: +2321: +2322: +2323: +2324: +2325: +2326: +2327: +2328: +2329: +2330: +2331: +2332: +2333: +2334: +2335: +2336: +2337: +2338: +2339: +2340: +2341: +2342: +2343: +2344: +2345: +2346: +2347: +2348: +2349: +2350: +2351: +2352: +2353: +2354: +2355: +2356: +2357: +2358: +2359: +2360: +2361: +2362: +2363: +2364: +2365: +2366: +2367: +2368: +2369: +2370: +2371: +2372: +2373: +2374: +2375: +2376: +2377: +2378: +2379: +2380: +2381: +2382: +2383: +2384: +2385: +2386: +2387: +2388: +2389: +2390: +2391: +2392: +2393: +2394: +2395: +2396: +2397: +2398: +2399: +2400: +2401: +2402: +2403: +2404: +2405: +2406: +2407: +2408: +2409: +2410: +2411: +2412: +2413: +2414: +2415: +2416: +2417: +2418: +2419: +2420: +2421: +2422: +2423: +2424: +2425: +2426: +2427: +2428: +2429: +2430: +2431: +2432: +2433: +2434: +2435: +2436: +2437: +2438: +2439: +2440: +2441: +2442: +2443: +2444: +2445: +2446: +2447: +2448: +2449: +2450: +2451: +2452: +2453: +2454: +2455: +2456: +2457: +2458: +2459: +2460: +2461: +2462: +2463: +2464: +2465: +2466: +2467: +2468: +2469: +2470: +2471: +2472: +2473: +2474: +2475: +2476: +2477: +2478: +2479: +2480: +2481: +2482: +2483: +2484: +2485: +2486: +2487: +2488: +2489: +2490: +2491: +2492: +2493: +2494: +2495: +2496: +2497: +2498: +2499: +2500: +2501: +2502: +2503: +2504: +2505: +2506: +2507: +2508: +2509: +2510: +2511: +2512: +2513: +2514: +2515: +2516: +2517: +2518: +2519: +2520: +2521: +2522: +2523: +2524: +2525: +2526: +2527: +2528: +2529: +2530: +2531: +2532: +2533: +2534: +2535: +2536: +2537: +2538: +2539: +2540: +2541: +2542: +2543: +2544: +2545: +2546: +2547: +2548: +2549: +2550: +2551: +2552: +2553: +2554: +2555: +2556: +2557: +2558: +2559: +2560: +2561: +2562: +2563: +2564: +2565: +2566: +2567: +2568: +2569: +2570: +2571: +2572: +2573: +2574: +2575: +2576: +2577: +2578: +2579: +2580: +2581: +2582: +2583: +2584: +2585: +2586: +2587: +2588: +2589: +2590: +2591: +2592: +2593: +2594: +2595: +2596: +2597: +2598: +2599: +2600: +2601: +2602: +2603: +2604: +2605: +2606: +2607: +2608: +2609: +2610: +2611: +2612: +2613: +2614: +2615: +2616: +2617: +2618: +2619: +2620: +2621: +2622: +2623: +2624: +2625: +2626: +2627: +2628: +2629: +2630: +2631: +2632: +2633: +2634: +2635: +2636: +2637: +2638: +2639: +2640: +2641: +2642: +2643: +2644: +2645: +2646: +2647: +2648: +2649: +2650: +2651: +2652: +2653: +2654: +2655: +2656: +2657: +2658: +2659: +2660: +2661: +2662: +2663: +2664: +2665: +2666: +2667: +2668: +2669: +2670: +2671: +2672: +2673: +2674: +2675: +2676: +2677: +2678: +2679: +2680: +2681: +2682: +2683: +2684: +2685: +2686: +2687: +2688: +2689: +2690: +2691: +2692: +2693: +2694: +2695: +2696: +2697: +2698: +2699: +2700: +2701: +2702: +2703: +2704: +2705: +2706: +2707: +2708: +2709: +2710: +2711: +2712: +2713: +2714: +2715: +2716: +2717: +2718: +2719: +2720: +2721: +2722: +2723: +2724: +2725: +2726: +2727: +2728: +2729: +2730: +2731: +2732: +2733: +2734: +2735: +2736: +2737: +2738: +2739: +2740: +2741: +2742: +2743: +2744: +2745: +2746: +2747: +2748: +2749: +2750: +2751: +2752: +2753: +2754: +2755: +2756: +2757: +2758: +2759: +2760: +2761: +2762: +2763: +2764: +2765: +2766: +2767: +2768: +2769: +2770: +2771: +2772: +2773: +2774: +2775: +2776: +2777: +2778: +2779: +2780: +2781: +2782: +2783: +2784: +2785: +2786: +2787: +2788: +2789: +2790: +2791: +2792: +2793: +2794: +2795: +2796: +2797: +2798: +2799: +2800: +2801: +2802: +2803: +2804: +2805: +2806: +2807: +2808: +2809: +2810: +2811: +2812: +2813: // Input RMSNorm +2814: +2815: +2816: +2817: +2818: +2819: +2820: +2821: +2822: +2823: +2824: +2825: +2826: +2827: +2828: +2829: +2830: +2831: +2832: +2833: +2834: +2835: +2836: +2837: +2838: +2839: +2840: +2841: +2842: +2843: +2844: +2845: +2846: +2847: +2848: +2849: +2850: +2851: +2852: +2853: +2854: +2855: +2856: +2857: +2858: +2859: +2860: +2861: +2862: +2863: +2864: +2865: +2866: +2867: +2868: +2869: +2870: +2871: RMSNorm +2872: +2873: +2874: +2875: +2876: +2877: +2878: +2879: +2880: +2881: +2882: +2883: +2884: +2885: +2886: +2887: +2888: +2889: +2890: +2891: +2892: // Output +2893: +2894: +2895: +2896: +2897: +2898: +2899: +2900: +2901: +2902: +2903: +2904: +2905: +2906: diff --git a/examples/BuddyLlama/subgraph.mlir b/examples/BuddyLlama/subgraph.mlir new file mode 100644 index 00000000..7fa3d2d7 --- /dev/null +++ b/examples/BuddyLlama/subgraph.mlir @@ -0,0 +1,4894 @@ +#map = affine_map<(d0, d1) -> (d0, d1)> +#map1 = affine_map<(d0, d1, d2) -> (d1)> +#map2 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> +#map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +module { + func.func @subgraph0(%arg0: tensor<32000x4096xf32>, %arg1: tensor<1x40xi64>, %arg2: tensor<64xf32>, %arg3: tensor<4096xf32>, %arg4: tensor<4096x4096xf32>, %arg5: tensor<4096x4096xf32>, %arg6: tensor<4096x4096xf32>, %arg7: tensor<4096x4096xf32>, %arg8: tensor<4096xf32>, %arg9: tensor<11008x4096xf32>, %arg10: tensor<11008x4096xf32>, %arg11: tensor<4096x11008xf32>, %arg12: tensor<4096xf32>, %arg13: tensor<4096x4096xf32>, %arg14: tensor<4096x4096xf32>, %arg15: tensor<4096x4096xf32>, %arg16: tensor<4096x4096xf32>, %arg17: tensor<4096xf32>, %arg18: tensor<11008x4096xf32>, %arg19: tensor<11008x4096xf32>, %arg20: tensor<4096x11008xf32>, %arg21: tensor<4096xf32>, %arg22: tensor<4096x4096xf32>, %arg23: tensor<4096x4096xf32>, %arg24: tensor<4096x4096xf32>, %arg25: tensor<4096x4096xf32>, %arg26: tensor<4096xf32>, %arg27: tensor<11008x4096xf32>, %arg28: tensor<11008x4096xf32>, %arg29: tensor<4096x11008xf32>, %arg30: tensor<4096xf32>, %arg31: tensor<4096x4096xf32>, %arg32: tensor<4096x4096xf32>, %arg33: tensor<4096x4096xf32>, %arg34: tensor<4096x4096xf32>, %arg35: tensor<4096xf32>, %arg36: tensor<11008x4096xf32>, %arg37: tensor<11008x4096xf32>, %arg38: tensor<4096x11008xf32>, %arg39: tensor<4096xf32>, %arg40: tensor<4096x4096xf32>, %arg41: tensor<4096x4096xf32>, %arg42: tensor<4096x4096xf32>, %arg43: tensor<4096x4096xf32>, %arg44: tensor<4096xf32>, %arg45: tensor<11008x4096xf32>, %arg46: tensor<11008x4096xf32>, %arg47: tensor<4096x11008xf32>, %arg48: tensor<4096xf32>, %arg49: tensor<4096x4096xf32>, %arg50: tensor<4096x4096xf32>, %arg51: tensor<4096x4096xf32>, %arg52: tensor<4096x4096xf32>, %arg53: tensor<4096xf32>, %arg54: tensor<11008x4096xf32>, %arg55: tensor<11008x4096xf32>, %arg56: tensor<4096x11008xf32>, %arg57: tensor<4096xf32>, %arg58: tensor<4096x4096xf32>, %arg59: tensor<4096x4096xf32>, %arg60: tensor<4096x4096xf32>, %arg61: tensor<4096x4096xf32>, %arg62: tensor<4096xf32>, %arg63: tensor<11008x4096xf32>, %arg64: tensor<11008x4096xf32>, %arg65: tensor<4096x11008xf32>, %arg66: tensor<4096xf32>, %arg67: tensor<4096x4096xf32>, %arg68: tensor<4096x4096xf32>, %arg69: tensor<4096x4096xf32>, %arg70: tensor<4096x4096xf32>, %arg71: tensor<4096xf32>, %arg72: tensor<11008x4096xf32>, %arg73: tensor<11008x4096xf32>, %arg74: tensor<4096x11008xf32>, %arg75: tensor<4096xf32>, %arg76: tensor<4096x4096xf32>, %arg77: tensor<4096x4096xf32>, %arg78: tensor<4096x4096xf32>, %arg79: tensor<4096x4096xf32>, %arg80: tensor<4096xf32>, %arg81: tensor<11008x4096xf32>, %arg82: tensor<11008x4096xf32>, %arg83: tensor<4096x11008xf32>, %arg84: tensor<4096xf32>, %arg85: tensor<4096x4096xf32>, %arg86: tensor<4096x4096xf32>, %arg87: tensor<4096x4096xf32>, %arg88: tensor<4096x4096xf32>, %arg89: tensor<4096xf32>, %arg90: tensor<11008x4096xf32>, %arg91: tensor<11008x4096xf32>, %arg92: tensor<4096x11008xf32>, %arg93: tensor<4096xf32>, %arg94: tensor<4096x4096xf32>, %arg95: tensor<4096x4096xf32>, %arg96: tensor<4096x4096xf32>, %arg97: tensor<4096x4096xf32>, %arg98: tensor<4096xf32>, %arg99: tensor<11008x4096xf32>, %arg100: tensor<11008x4096xf32>, %arg101: tensor<4096x11008xf32>, %arg102: tensor<4096xf32>, %arg103: tensor<4096x4096xf32>, %arg104: tensor<4096x4096xf32>, %arg105: tensor<4096x4096xf32>, %arg106: tensor<4096x4096xf32>, %arg107: tensor<4096xf32>, %arg108: tensor<11008x4096xf32>, %arg109: tensor<11008x4096xf32>, %arg110: tensor<4096x11008xf32>, %arg111: tensor<4096xf32>, %arg112: tensor<4096x4096xf32>, %arg113: tensor<4096x4096xf32>, %arg114: tensor<4096x4096xf32>, %arg115: tensor<4096x4096xf32>, %arg116: tensor<4096xf32>, %arg117: tensor<11008x4096xf32>, %arg118: tensor<11008x4096xf32>, %arg119: tensor<4096x11008xf32>, %arg120: tensor<4096xf32>, %arg121: tensor<4096x4096xf32>, %arg122: tensor<4096x4096xf32>, %arg123: tensor<4096x4096xf32>, %arg124: tensor<4096x4096xf32>, %arg125: tensor<4096xf32>, %arg126: tensor<11008x4096xf32>, %arg127: tensor<11008x4096xf32>, %arg128: tensor<4096x11008xf32>, %arg129: tensor<4096xf32>, %arg130: tensor<4096x4096xf32>, %arg131: tensor<4096x4096xf32>, %arg132: tensor<4096x4096xf32>, %arg133: tensor<4096x4096xf32>, %arg134: tensor<4096xf32>, %arg135: tensor<11008x4096xf32>, %arg136: tensor<11008x4096xf32>, %arg137: tensor<4096x11008xf32>, %arg138: tensor<4096xf32>, %arg139: tensor<4096x4096xf32>, %arg140: tensor<4096x4096xf32>, %arg141: tensor<4096x4096xf32>, %arg142: tensor<4096x4096xf32>, %arg143: tensor<4096xf32>, %arg144: tensor<11008x4096xf32>, %arg145: tensor<11008x4096xf32>, %arg146: tensor<4096x11008xf32>, %arg147: tensor<4096xf32>, %arg148: tensor<4096x4096xf32>, %arg149: tensor<4096x4096xf32>, %arg150: tensor<4096x4096xf32>, %arg151: tensor<4096x4096xf32>, %arg152: tensor<4096xf32>, %arg153: tensor<11008x4096xf32>, %arg154: tensor<11008x4096xf32>, %arg155: tensor<4096x11008xf32>, %arg156: tensor<4096xf32>, %arg157: tensor<4096x4096xf32>, %arg158: tensor<4096x4096xf32>, %arg159: tensor<4096x4096xf32>, %arg160: tensor<4096x4096xf32>, %arg161: tensor<4096xf32>, %arg162: tensor<11008x4096xf32>, %arg163: tensor<11008x4096xf32>, %arg164: tensor<4096x11008xf32>, %arg165: tensor<4096xf32>, %arg166: tensor<4096x4096xf32>, %arg167: tensor<4096x4096xf32>, %arg168: tensor<4096x4096xf32>, %arg169: tensor<4096x4096xf32>, %arg170: tensor<4096xf32>, %arg171: tensor<11008x4096xf32>, %arg172: tensor<11008x4096xf32>, %arg173: tensor<4096x11008xf32>, %arg174: tensor<4096xf32>, %arg175: tensor<4096x4096xf32>, %arg176: tensor<4096x4096xf32>, %arg177: tensor<4096x4096xf32>, %arg178: tensor<4096x4096xf32>, %arg179: tensor<4096xf32>, %arg180: tensor<11008x4096xf32>, %arg181: tensor<11008x4096xf32>, %arg182: tensor<4096x11008xf32>, %arg183: tensor<4096xf32>, %arg184: tensor<4096x4096xf32>, %arg185: tensor<4096x4096xf32>, %arg186: tensor<4096x4096xf32>, %arg187: tensor<4096x4096xf32>, %arg188: tensor<4096xf32>, %arg189: tensor<11008x4096xf32>, %arg190: tensor<11008x4096xf32>, %arg191: tensor<4096x11008xf32>, %arg192: tensor<4096xf32>, %arg193: tensor<4096x4096xf32>, %arg194: tensor<4096x4096xf32>, %arg195: tensor<4096x4096xf32>, %arg196: tensor<4096x4096xf32>, %arg197: tensor<4096xf32>, %arg198: tensor<11008x4096xf32>, %arg199: tensor<11008x4096xf32>, %arg200: tensor<4096x11008xf32>, %arg201: tensor<4096xf32>, %arg202: tensor<4096x4096xf32>, %arg203: tensor<4096x4096xf32>, %arg204: tensor<4096x4096xf32>, %arg205: tensor<4096x4096xf32>, %arg206: tensor<4096xf32>, %arg207: tensor<11008x4096xf32>, %arg208: tensor<11008x4096xf32>, %arg209: tensor<4096x11008xf32>, %arg210: tensor<4096xf32>, %arg211: tensor<4096x4096xf32>, %arg212: tensor<4096x4096xf32>, %arg213: tensor<4096x4096xf32>, %arg214: tensor<4096x4096xf32>, %arg215: tensor<4096xf32>, %arg216: tensor<11008x4096xf32>, %arg217: tensor<11008x4096xf32>, %arg218: tensor<4096x11008xf32>, %arg219: tensor<4096xf32>, %arg220: tensor<4096x4096xf32>, %arg221: tensor<4096x4096xf32>, %arg222: tensor<4096x4096xf32>, %arg223: tensor<4096x4096xf32>, %arg224: tensor<4096xf32>, %arg225: tensor<11008x4096xf32>, %arg226: tensor<11008x4096xf32>, %arg227: tensor<4096x11008xf32>, %arg228: tensor<4096xf32>, %arg229: tensor<4096x4096xf32>, %arg230: tensor<4096x4096xf32>, %arg231: tensor<4096x4096xf32>, %arg232: tensor<4096x4096xf32>, %arg233: tensor<4096xf32>, %arg234: tensor<11008x4096xf32>, %arg235: tensor<11008x4096xf32>, %arg236: tensor<4096x11008xf32>, %arg237: tensor<4096xf32>, %arg238: tensor<4096x4096xf32>, %arg239: tensor<4096x4096xf32>, %arg240: tensor<4096x4096xf32>, %arg241: tensor<4096x4096xf32>, %arg242: tensor<4096xf32>, %arg243: tensor<11008x4096xf32>, %arg244: tensor<11008x4096xf32>, %arg245: tensor<4096x11008xf32>, %arg246: tensor<4096xf32>, %arg247: tensor<4096x4096xf32>, %arg248: tensor<4096x4096xf32>, %arg249: tensor<4096x4096xf32>, %arg250: tensor<4096x4096xf32>, %arg251: tensor<4096xf32>, %arg252: tensor<11008x4096xf32>, %arg253: tensor<11008x4096xf32>, %arg254: tensor<4096x11008xf32>, %arg255: tensor<4096xf32>, %arg256: tensor<4096x4096xf32>, %arg257: tensor<4096x4096xf32>, %arg258: tensor<4096x4096xf32>, %arg259: tensor<4096x4096xf32>, %arg260: tensor<4096xf32>, %arg261: tensor<11008x4096xf32>, %arg262: tensor<11008x4096xf32>, %arg263: tensor<4096x11008xf32>, %arg264: tensor<4096xf32>, %arg265: tensor<4096x4096xf32>, %arg266: tensor<4096x4096xf32>, %arg267: tensor<4096x4096xf32>, %arg268: tensor<4096x4096xf32>, %arg269: tensor<4096xf32>, %arg270: tensor<11008x4096xf32>, %arg271: tensor<11008x4096xf32>, %arg272: tensor<4096x11008xf32>, %arg273: tensor<4096xf32>, %arg274: tensor<4096x4096xf32>, %arg275: tensor<4096x4096xf32>, %arg276: tensor<4096x4096xf32>, %arg277: tensor<4096x4096xf32>, %arg278: tensor<4096xf32>, %arg279: tensor<11008x4096xf32>, %arg280: tensor<11008x4096xf32>, %arg281: tensor<4096x11008xf32>, %arg282: tensor<4096xf32>, %arg283: tensor<4096x4096xf32>, %arg284: tensor<4096x4096xf32>, %arg285: tensor<4096x4096xf32>, %arg286: tensor<4096x4096xf32>, %arg287: tensor<4096xf32>, %arg288: tensor<11008x4096xf32>, %arg289: tensor<11008x4096xf32>, %arg290: tensor<4096x11008xf32>, %arg291: tensor<4096xf32>, %arg292: tensor<32000x4096xf32>) -> tensor<1x40x32000xf32> { + %0 = tosa.cast %arg1 : (tensor<1x40xi64>) -> tensor<1x40xi32> + %1 = tosa.reshape %arg0 {new_shape = array} : (tensor<32000x4096xf32>) -> tensor<1x32000x4096xf32> + %2 = tosa.gather %1, %0 : (tensor<1x32000x4096xf32>, tensor<1x40xi32>) -> tensor<1x40x4096xf32> + %3 = tosa.reshape %2 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %4 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]> : tensor<40xi64>}> : () -> tensor<40xi64> + %5 = tosa.reshape %4 {new_shape = array} : (tensor<40xi64>) -> tensor<1x40xi64> + %cst = arith.constant dense<-3.40282347E+38> : tensor<40x41xf32> + %6 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]> : tensor<41xi64>}> : () -> tensor<41xi64> + %7 = tosa.reshape %6 {new_shape = array} : (tensor<41xi64>) -> tensor<1x41xi64> + %8 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]> : tensor<40xi64>}> : () -> tensor<40xi64> + %9 = tosa.reshape %8 {new_shape = array} : (tensor<40xi64>) -> tensor<40x1xi64> + %10 = tosa.sub %7, %9 : (tensor<1x41xi64>, tensor<40x1xi64>) -> tensor<40x41xi64> + %c1_i64 = arith.constant 1 : i64 + %splat = tensor.splat %c1_i64 : tensor<40x41xi64> + %11 = arith.cmpi sge, %10, %splat : tensor<40x41xi64> + %cst_0 = arith.constant 0.000000e+00 : f32 + %12 = tensor.empty() : tensor<40x41xf32> + %splat_1 = tensor.splat %cst_0 : tensor<40x41xf32> + %13 = linalg.generic {indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%11, %cst, %splat_1 : tensor<40x41xi1>, tensor<40x41xf32>, tensor<40x41xf32>) outs(%12 : tensor<40x41xf32>) { + ^bb0(%in: i1, %in_873: f32, %in_874: f32, %out: f32): + %3745 = arith.select %in, %in_873, %in_874 : f32 + linalg.yield %3745 : f32 + } -> tensor<40x41xf32> + %14 = "tosa.const"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]> : tensor<41xi64>}> : () -> tensor<41xi64> + %15 = tosa.reshape %4 {new_shape = array} : (tensor<40xi64>) -> tensor<40x1xi64> + %16 = tensor.empty() : tensor<40x41xi1> + %17 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"]} ins(%14, %15 : tensor<41xi64>, tensor<40x1xi64>) outs(%16 : tensor<40x41xi1>) { + ^bb0(%in: i64, %in_873: i64, %out: i1): + %3745 = arith.cmpi sgt, %in, %in_873 : i64 + linalg.yield %3745 : i1 + } -> tensor<40x41xi1> + %18 = tosa.cast %17 : (tensor<40x41xi1>) -> tensor<40x41xf32> + %19 = tosa.mul %13, %18 {shift = 0 : i8} : (tensor<40x41xf32>, tensor<40x41xf32>) -> tensor<40x41xf32> // ******* + %20 = tosa.reshape %arg2 {new_shape = array} : (tensor<64xf32>) -> tensor<1x64xf32> + %extracted_slice = tensor.extract_slice %20[0, 0] [1, 64] [1, 1] : tensor<1x64xf32> to tensor<1x64xf32> + %21 = tosa.reshape %extracted_slice {new_shape = array} : (tensor<1x64xf32>) -> tensor<1x64x1xf32> + %22 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x64x1xf32>}> : () -> tensor<1x64x1xf32> + %23 = tosa.add %21, %22 : (tensor<1x64x1xf32>, tensor<1x64x1xf32>) -> tensor<1x64x1xf32> + %extracted_slice_2 = tensor.extract_slice %5[0, 0] [1, 40] [1, 1] : tensor<1x40xi64> to tensor<1x40xi64> + %24 = tosa.reshape %extracted_slice_2 {new_shape = array} : (tensor<1x40xi64>) -> tensor<1x1x40xi64> + %extracted_slice_3 = tensor.extract_slice %24[0, 0, 0] [1, 1, 40] [1, 1, 1] : tensor<1x1x40xi64> to tensor<1x1x40xi64> + %25 = tosa.cast %extracted_slice_3 : (tensor<1x1x40xi64>) -> tensor<1x1x40xf32> + %26 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x64x1xf32>}> : () -> tensor<1x64x1xf32> + %27 = tosa.add %23, %26 : (tensor<1x64x1xf32>, tensor<1x64x1xf32>) -> tensor<1x64x1xf32> + %28 = tosa.reshape %27 {new_shape = array} : (tensor<1x64x1xf32>) -> tensor<1x64x1xf32> + %29 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40xf32>}> : () -> tensor<1x1x40xf32> + %30 = tosa.add %25, %29 : (tensor<1x1x40xf32>, tensor<1x1x40xf32>) -> tensor<1x1x40xf32> + %31 = tosa.reshape %30 {new_shape = array} : (tensor<1x1x40xf32>) -> tensor<1x1x40xf32> + %32 = tosa.matmul %28, %31 : (tensor<1x64x1xf32>, tensor<1x1x40xf32>) -> tensor<1x64x40xf32> + %33 = tosa.reshape %32 {new_shape = array} : (tensor<1x64x40xf32>) -> tensor<1x64x40xf32> + %34 = "tosa.const"() <{value = dense<[0, 2, 1]> : tensor<3xi32>}> : () -> tensor<3xi32> + %35 = tosa.transpose %33, %34 : (tensor<1x64x40xf32>, tensor<3xi32>) -> tensor<1x40x64xf32> + %36 = tosa.reshape %35 {new_shape = array} : (tensor<1x40x64xf32>) -> tensor<1x40x1x64xf32> + %37 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x40x2x64xf32>}> : () -> tensor<1x40x2x64xf32> + %38 = tosa.add %36, %37 : (tensor<1x40x1x64xf32>, tensor<1x40x2x64xf32>) -> tensor<1x40x2x64xf32> + %39 = tosa.identity %38 : (tensor<1x40x2x64xf32>) -> tensor<1x40x2x64xf32> + %40 = tosa.reshape %39 {new_shape = array} : (tensor<1x40x2x64xf32>) -> tensor<1x40x128xf32> + %41 = tosa.identity %40 : (tensor<1x40x128xf32>) -> tensor<1x40x128xf32> + %42 = math.cos %41 : tensor<1x40x128xf32> + %43 = math.sin %41 : tensor<1x40x128xf32> + %cst_4 = arith.constant dense<1.000000e+00> : tensor<1xf32> + %44 = tosa.reshape %cst_4 {new_shape = array} : (tensor<1xf32>) -> tensor<1x1x1xf32> + %45 = tosa.mul %42, %44 {shift = 0 : i8} : (tensor<1x40x128xf32>, tensor<1x1x1xf32>) -> tensor<1x40x128xf32> // *************** + %cst_5 = arith.constant dense<1.000000e+00> : tensor<1xf32> + %46 = tosa.reshape %cst_5 {new_shape = array} : (tensor<1xf32>) -> tensor<1x1x1xf32> + %47 = tosa.mul %43, %46 {shift = 0 : i8} : (tensor<1x40x128xf32>, tensor<1x1x1xf32>) -> tensor<1x40x128xf32> // *************** + %48 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32 = arith.constant 2 : i32 + %49 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3 : tensor<1x40x4096xf32>) outs(%48 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %50 = tosa.reduce_sum %49 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %51 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %52 = tosa.reciprocal %51 : (tensor<1xf32>) -> tensor<1xf32> + %53 = tosa.mul %52, %50 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %54 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %55 = tosa.add %53, %54 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %56 = tosa.rsqrt %55 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %57 = tosa.mul %3, %56 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %58 = tosa.reshape %arg3 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %59 = tosa.mul %58, %57 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %60 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %61 = tosa.transpose %arg4, %60 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %62 = tosa.reshape %59 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_6 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %63 = linalg.matmul {cast = #linalg.type_fn} ins(%62, %61 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_6 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %64 = tosa.reshape %63 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %65 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %66 = tosa.transpose %arg5, %65 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %67 = tosa.reshape %59 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_7 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %68 = linalg.matmul {cast = #linalg.type_fn} ins(%67, %66 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_7 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %69 = tosa.reshape %68 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %70 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %71 = tosa.transpose %arg6, %70 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %72 = tosa.reshape %59 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_8 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %73 = linalg.matmul {cast = #linalg.type_fn} ins(%72, %71 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_8 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %74 = tosa.reshape %73 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %75 = tosa.reshape %64 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %76 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %77 = tosa.transpose %75, %76 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %78 = tosa.reshape %69 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %79 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %80 = tosa.transpose %78, %79 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %81 = tosa.reshape %74 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %82 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %83 = tosa.transpose %81, %82 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %84 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %85 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %86 = tosa.mul %77, %84 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_9 = tensor.extract_slice %77[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_10 = tensor.extract_slice %77[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %87 = tensor.empty() : tensor<1x32x40x64xf32> + %88 = linalg.negf ins(%extracted_slice_10 : tensor<1x32x40x64xf32>) outs(%87 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %89 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice = tensor.insert_slice %88 into %89[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_11 = tensor.insert_slice %extracted_slice_9 into %inserted_slice[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %90 = tosa.mul %inserted_slice_11, %85 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %91 = tosa.add %86, %90 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %92 = tosa.mul %80, %84 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_12 = tensor.extract_slice %80[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_13 = tensor.extract_slice %80[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %93 = tensor.empty() : tensor<1x32x40x64xf32> + %94 = linalg.negf ins(%extracted_slice_13 : tensor<1x32x40x64xf32>) outs(%93 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %95 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_14 = tensor.insert_slice %94 into %95[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_15 = tensor.insert_slice %extracted_slice_12 into %inserted_slice_14[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %96 = tosa.mul %inserted_slice_15, %85 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %97 = tosa.add %92, %96 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %98 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %99 = tosa.reshape %98 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_16 = tensor.extract_slice %99[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_17 = tensor.extract_slice %extracted_slice_16[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %100 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %101 = tosa.add %extracted_slice_17, %100 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_18 = tensor.extract_slice %101[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_19 = tensor.extract_slice %extracted_slice_18[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_20 = tensor.extract_slice %extracted_slice_19[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_21 = tensor.extract_slice %extracted_slice_20[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_22 = arith.constant 0.000000e+00 : f32 + %splat_23 = tensor.splat %cst_22 : tensor<40x40xf32> + %102 = tosa.reshape %extracted_slice_21 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %103 = tosa.add %splat_23, %102 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %104 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %105 = tosa.transpose %97, %104 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %106 = tosa.reshape %91 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %107 = tosa.reshape %105 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %108 = tosa.matmul %106, %107 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_24 = arith.constant 0.0883883461 : f32 + %splat_25 = tensor.splat %cst_24 : tensor<32x40x40xf32> + %109 = tosa.mul %108, %splat_25 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %110 = tosa.add %109, %103 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %111 = tosa.reduce_max %110 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %112 = tosa.sub %110, %111 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %113 = math.exp %112 : tensor<32x40x40xf32> + %114 = tosa.reduce_sum %113 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %115 = tosa.log %114 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %116 = tosa.add %111, %115 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %117 = tosa.sub %110, %116 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %118 = math.exp %117 : tensor<32x40x40xf32> + %119 = tosa.reshape %116 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %120 = tosa.reshape %83 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %121 = tosa.matmul %118, %120 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %130 = tosa.reshape %129 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %131 = tosa.add %3, %130 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %132 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_27 = arith.constant 2 : i32 + %133 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%131 : tensor<1x40x4096xf32>) outs(%132 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_27 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %134 = tosa.reduce_sum %133 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %135 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %136 = tosa.reciprocal %135 : (tensor<1xf32>) -> tensor<1xf32> + %137 = tosa.mul %136, %134 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %138 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %139 = tosa.add %137, %138 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %140 = tosa.rsqrt %139 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %141 = tosa.mul %131, %140 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %142 = tosa.reshape %arg8 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %143 = tosa.mul %142, %141 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %144 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %145 = tosa.transpose %arg9, %144 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %146 = tosa.reshape %143 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_28 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %147 = linalg.matmul {cast = #linalg.type_fn} ins(%146, %145 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_28 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %148 = tosa.reshape %147 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %149 = tosa.sigmoid %148 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %150 = tosa.mul %148, %149 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %151 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %152 = tosa.transpose %arg10, %151 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %153 = tosa.reshape %143 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_29 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %154 = linalg.matmul {cast = #linalg.type_fn} ins(%153, %152 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_29 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %155 = tosa.reshape %154 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %156 = tosa.mul %150, %155 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %157 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %158 = tosa.transpose %arg11, %157 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %159 = tosa.reshape %156 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_30 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %160 = linalg.matmul {cast = #linalg.type_fn} ins(%159, %158 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_30 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %161 = tosa.reshape %160 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %162 = tosa.add %131, %161 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %163 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_31 = arith.constant 2 : i32 + %164 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%162 : tensor<1x40x4096xf32>) outs(%163 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_31 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %165 = tosa.reduce_sum %164 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %166 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %167 = tosa.reciprocal %166 : (tensor<1xf32>) -> tensor<1xf32> + %168 = tosa.mul %167, %165 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %169 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %170 = tosa.add %168, %169 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %171 = tosa.rsqrt %170 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %172 = tosa.mul %162, %171 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %173 = tosa.reshape %arg12 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %174 = tosa.mul %173, %172 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %175 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %176 = tosa.transpose %arg13, %175 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %177 = tosa.reshape %174 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_32 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %178 = linalg.matmul {cast = #linalg.type_fn} ins(%177, %176 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_32 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %179 = tosa.reshape %178 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %180 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %181 = tosa.transpose %arg14, %180 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %182 = tosa.reshape %174 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_33 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %183 = linalg.matmul {cast = #linalg.type_fn} ins(%182, %181 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_33 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %184 = tosa.reshape %183 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %185 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %186 = tosa.transpose %arg15, %185 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %187 = tosa.reshape %174 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_34 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %188 = linalg.matmul {cast = #linalg.type_fn} ins(%187, %186 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_34 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %189 = tosa.reshape %188 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %190 = tosa.reshape %179 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %191 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %192 = tosa.transpose %190, %191 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %193 = tosa.reshape %184 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %194 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %195 = tosa.transpose %193, %194 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %196 = tosa.reshape %189 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %197 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %198 = tosa.transpose %196, %197 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %199 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %200 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %201 = tosa.mul %192, %199 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_35 = tensor.extract_slice %192[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_36 = tensor.extract_slice %192[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %202 = tensor.empty() : tensor<1x32x40x64xf32> + %203 = linalg.negf ins(%extracted_slice_36 : tensor<1x32x40x64xf32>) outs(%202 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %204 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_37 = tensor.insert_slice %203 into %204[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_38 = tensor.insert_slice %extracted_slice_35 into %inserted_slice_37[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %205 = tosa.mul %inserted_slice_38, %200 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %206 = tosa.add %201, %205 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %207 = tosa.mul %195, %199 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_39 = tensor.extract_slice %195[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_40 = tensor.extract_slice %195[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %208 = tensor.empty() : tensor<1x32x40x64xf32> + %209 = linalg.negf ins(%extracted_slice_40 : tensor<1x32x40x64xf32>) outs(%208 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %210 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_41 = tensor.insert_slice %209 into %210[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_42 = tensor.insert_slice %extracted_slice_39 into %inserted_slice_41[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %211 = tosa.mul %inserted_slice_42, %200 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %212 = tosa.add %207, %211 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %213 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %214 = tosa.reshape %213 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_43 = tensor.extract_slice %214[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_44 = tensor.extract_slice %extracted_slice_43[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %215 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %216 = tosa.add %extracted_slice_44, %215 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_45 = tensor.extract_slice %216[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_46 = tensor.extract_slice %extracted_slice_45[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_47 = tensor.extract_slice %extracted_slice_46[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_48 = tensor.extract_slice %extracted_slice_47[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_49 = arith.constant 0.000000e+00 : f32 + %splat_50 = tensor.splat %cst_49 : tensor<40x40xf32> + %217 = tosa.reshape %extracted_slice_48 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %218 = tosa.add %splat_50, %217 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %219 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %220 = tosa.transpose %212, %219 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %221 = tosa.reshape %206 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %222 = tosa.reshape %220 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %223 = tosa.matmul %221, %222 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_51 = arith.constant 0.0883883461 : f32 + %splat_52 = tensor.splat %cst_51 : tensor<32x40x40xf32> + %224 = tosa.mul %223, %splat_52 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %225 = tosa.add %224, %218 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %226 = tosa.reduce_max %225 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %227 = tosa.sub %225, %226 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %228 = math.exp %227 : tensor<32x40x40xf32> + %229 = tosa.reduce_sum %228 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %230 = tosa.log %229 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %231 = tosa.add %226, %230 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %232 = tosa.sub %225, %231 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %233 = math.exp %232 : tensor<32x40x40xf32> + %234 = tosa.reshape %231 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %235 = tosa.reshape %198 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %236 = tosa.matmul %233, %235 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %237 = tosa.reshape %236 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %238 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %239 = tosa.transpose %237, %238 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %240 = tosa.reshape %239 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %241 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %242 = tosa.transpose %arg16, %241 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %243 = tosa.reshape %240 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_53 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %244 = linalg.matmul {cast = #linalg.type_fn} ins(%243, %242 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_53 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %245 = tosa.reshape %244 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %246 = tosa.add %162, %245 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %247 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_54 = arith.constant 2 : i32 + %248 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%246 : tensor<1x40x4096xf32>) outs(%247 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_54 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %249 = tosa.reduce_sum %248 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %250 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %251 = tosa.reciprocal %250 : (tensor<1xf32>) -> tensor<1xf32> + %252 = tosa.mul %251, %249 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %253 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %254 = tosa.add %252, %253 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %255 = tosa.rsqrt %254 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %256 = tosa.mul %246, %255 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %257 = tosa.reshape %arg17 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %258 = tosa.mul %257, %256 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %259 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %260 = tosa.transpose %arg18, %259 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %261 = tosa.reshape %258 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_55 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %262 = linalg.matmul {cast = #linalg.type_fn} ins(%261, %260 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_55 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %263 = tosa.reshape %262 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %264 = tosa.sigmoid %263 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %265 = tosa.mul %263, %264 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %266 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %267 = tosa.transpose %arg19, %266 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %268 = tosa.reshape %258 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_56 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %269 = linalg.matmul {cast = #linalg.type_fn} ins(%268, %267 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_56 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %270 = tosa.reshape %269 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %271 = tosa.mul %265, %270 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %272 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %273 = tosa.transpose %arg20, %272 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %274 = tosa.reshape %271 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_57 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %275 = linalg.matmul {cast = #linalg.type_fn} ins(%274, %273 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_57 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %276 = tosa.reshape %275 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %277 = tosa.add %246, %276 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %278 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_58 = arith.constant 2 : i32 + %279 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%277 : tensor<1x40x4096xf32>) outs(%278 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_58 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %280 = tosa.reduce_sum %279 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %281 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %282 = tosa.reciprocal %281 : (tensor<1xf32>) -> tensor<1xf32> + %283 = tosa.mul %282, %280 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %284 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %285 = tosa.add %283, %284 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %286 = tosa.rsqrt %285 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %287 = tosa.mul %277, %286 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %288 = tosa.reshape %arg21 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %289 = tosa.mul %288, %287 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %290 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %291 = tosa.transpose %arg22, %290 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %292 = tosa.reshape %289 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_59 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %293 = linalg.matmul {cast = #linalg.type_fn} ins(%292, %291 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_59 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %294 = tosa.reshape %293 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %295 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %296 = tosa.transpose %arg23, %295 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %297 = tosa.reshape %289 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_60 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %298 = linalg.matmul {cast = #linalg.type_fn} ins(%297, %296 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_60 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %299 = tosa.reshape %298 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %300 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %301 = tosa.transpose %arg24, %300 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %302 = tosa.reshape %289 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_61 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %303 = linalg.matmul {cast = #linalg.type_fn} ins(%302, %301 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_61 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %304 = tosa.reshape %303 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %305 = tosa.reshape %294 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %306 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %307 = tosa.transpose %305, %306 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %308 = tosa.reshape %299 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %309 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %310 = tosa.transpose %308, %309 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %311 = tosa.reshape %304 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %312 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %313 = tosa.transpose %311, %312 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %314 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %315 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %316 = tosa.mul %307, %314 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_62 = tensor.extract_slice %307[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_63 = tensor.extract_slice %307[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %317 = tensor.empty() : tensor<1x32x40x64xf32> + %318 = linalg.negf ins(%extracted_slice_63 : tensor<1x32x40x64xf32>) outs(%317 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %319 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_64 = tensor.insert_slice %318 into %319[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_65 = tensor.insert_slice %extracted_slice_62 into %inserted_slice_64[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %320 = tosa.mul %inserted_slice_65, %315 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %321 = tosa.add %316, %320 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %322 = tosa.mul %310, %314 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_66 = tensor.extract_slice %310[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_67 = tensor.extract_slice %310[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %323 = tensor.empty() : tensor<1x32x40x64xf32> + %324 = linalg.negf ins(%extracted_slice_67 : tensor<1x32x40x64xf32>) outs(%323 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %325 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_68 = tensor.insert_slice %324 into %325[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_69 = tensor.insert_slice %extracted_slice_66 into %inserted_slice_68[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %326 = tosa.mul %inserted_slice_69, %315 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %327 = tosa.add %322, %326 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %328 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %329 = tosa.reshape %328 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_70 = tensor.extract_slice %329[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_71 = tensor.extract_slice %extracted_slice_70[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %330 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %331 = tosa.add %extracted_slice_71, %330 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_72 = tensor.extract_slice %331[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_73 = tensor.extract_slice %extracted_slice_72[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_74 = tensor.extract_slice %extracted_slice_73[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_75 = tensor.extract_slice %extracted_slice_74[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_76 = arith.constant 0.000000e+00 : f32 + %splat_77 = tensor.splat %cst_76 : tensor<40x40xf32> + %332 = tosa.reshape %extracted_slice_75 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %333 = tosa.add %splat_77, %332 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %334 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %335 = tosa.transpose %327, %334 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %336 = tosa.reshape %321 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %337 = tosa.reshape %335 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %338 = tosa.matmul %336, %337 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_78 = arith.constant 0.0883883461 : f32 + %splat_79 = tensor.splat %cst_78 : tensor<32x40x40xf32> + %339 = tosa.mul %338, %splat_79 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %340 = tosa.add %339, %333 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %341 = tosa.reduce_max %340 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %342 = tosa.sub %340, %341 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %343 = math.exp %342 : tensor<32x40x40xf32> + %344 = tosa.reduce_sum %343 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %345 = tosa.log %344 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %346 = tosa.add %341, %345 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %347 = tosa.sub %340, %346 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %348 = math.exp %347 : tensor<32x40x40xf32> + %349 = tosa.reshape %346 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %350 = tosa.reshape %313 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %351 = tosa.matmul %348, %350 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %352 = tosa.reshape %351 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %353 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %354 = tosa.transpose %352, %353 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %355 = tosa.reshape %354 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %356 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %357 = tosa.transpose %arg25, %356 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %358 = tosa.reshape %355 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_80 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %359 = linalg.matmul {cast = #linalg.type_fn} ins(%358, %357 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_80 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %360 = tosa.reshape %359 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %361 = tosa.add %277, %360 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %362 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_81 = arith.constant 2 : i32 + %363 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%361 : tensor<1x40x4096xf32>) outs(%362 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_81 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %364 = tosa.reduce_sum %363 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %365 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %366 = tosa.reciprocal %365 : (tensor<1xf32>) -> tensor<1xf32> + %367 = tosa.mul %366, %364 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %368 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %369 = tosa.add %367, %368 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %370 = tosa.rsqrt %369 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %371 = tosa.mul %361, %370 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %372 = tosa.reshape %arg26 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %373 = tosa.mul %372, %371 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %374 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %375 = tosa.transpose %arg27, %374 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %376 = tosa.reshape %373 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_82 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %377 = linalg.matmul {cast = #linalg.type_fn} ins(%376, %375 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_82 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %378 = tosa.reshape %377 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %379 = tosa.sigmoid %378 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %380 = tosa.mul %378, %379 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %381 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %382 = tosa.transpose %arg28, %381 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %383 = tosa.reshape %373 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_83 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %384 = linalg.matmul {cast = #linalg.type_fn} ins(%383, %382 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_83 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %385 = tosa.reshape %384 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %386 = tosa.mul %380, %385 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %387 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %388 = tosa.transpose %arg29, %387 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %389 = tosa.reshape %386 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_84 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %390 = linalg.matmul {cast = #linalg.type_fn} ins(%389, %388 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_84 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %391 = tosa.reshape %390 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %392 = tosa.add %361, %391 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %393 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_85 = arith.constant 2 : i32 + %394 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%392 : tensor<1x40x4096xf32>) outs(%393 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_85 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %395 = tosa.reduce_sum %394 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %396 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %397 = tosa.reciprocal %396 : (tensor<1xf32>) -> tensor<1xf32> + %398 = tosa.mul %397, %395 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %399 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %400 = tosa.add %398, %399 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %401 = tosa.rsqrt %400 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %402 = tosa.mul %392, %401 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %403 = tosa.reshape %arg30 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %404 = tosa.mul %403, %402 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %405 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %406 = tosa.transpose %arg31, %405 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %407 = tosa.reshape %404 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_86 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %408 = linalg.matmul {cast = #linalg.type_fn} ins(%407, %406 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_86 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %409 = tosa.reshape %408 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %410 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %411 = tosa.transpose %arg32, %410 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %412 = tosa.reshape %404 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_87 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %413 = linalg.matmul {cast = #linalg.type_fn} ins(%412, %411 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_87 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %414 = tosa.reshape %413 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %415 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %416 = tosa.transpose %arg33, %415 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %417 = tosa.reshape %404 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_88 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %418 = linalg.matmul {cast = #linalg.type_fn} ins(%417, %416 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_88 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %419 = tosa.reshape %418 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %420 = tosa.reshape %409 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %421 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %422 = tosa.transpose %420, %421 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %423 = tosa.reshape %414 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %424 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %425 = tosa.transpose %423, %424 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %426 = tosa.reshape %419 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %427 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %428 = tosa.transpose %426, %427 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %429 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %430 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %431 = tosa.mul %422, %429 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_89 = tensor.extract_slice %422[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_90 = tensor.extract_slice %422[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %432 = tensor.empty() : tensor<1x32x40x64xf32> + %433 = linalg.negf ins(%extracted_slice_90 : tensor<1x32x40x64xf32>) outs(%432 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %434 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_91 = tensor.insert_slice %433 into %434[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_92 = tensor.insert_slice %extracted_slice_89 into %inserted_slice_91[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %435 = tosa.mul %inserted_slice_92, %430 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %436 = tosa.add %431, %435 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %437 = tosa.mul %425, %429 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_93 = tensor.extract_slice %425[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_94 = tensor.extract_slice %425[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %438 = tensor.empty() : tensor<1x32x40x64xf32> + %439 = linalg.negf ins(%extracted_slice_94 : tensor<1x32x40x64xf32>) outs(%438 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %440 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_95 = tensor.insert_slice %439 into %440[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_96 = tensor.insert_slice %extracted_slice_93 into %inserted_slice_95[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %441 = tosa.mul %inserted_slice_96, %430 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %442 = tosa.add %437, %441 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %443 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %444 = tosa.reshape %443 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_97 = tensor.extract_slice %444[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_98 = tensor.extract_slice %extracted_slice_97[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %445 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %446 = tosa.add %extracted_slice_98, %445 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_99 = tensor.extract_slice %446[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_100 = tensor.extract_slice %extracted_slice_99[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_101 = tensor.extract_slice %extracted_slice_100[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_102 = tensor.extract_slice %extracted_slice_101[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_103 = arith.constant 0.000000e+00 : f32 + %splat_104 = tensor.splat %cst_103 : tensor<40x40xf32> + %447 = tosa.reshape %extracted_slice_102 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %448 = tosa.add %splat_104, %447 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %449 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %450 = tosa.transpose %442, %449 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %451 = tosa.reshape %436 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %452 = tosa.reshape %450 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %453 = tosa.matmul %451, %452 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_105 = arith.constant 0.0883883461 : f32 + %splat_106 = tensor.splat %cst_105 : tensor<32x40x40xf32> + %454 = tosa.mul %453, %splat_106 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %455 = tosa.add %454, %448 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %456 = tosa.reduce_max %455 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %457 = tosa.sub %455, %456 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %458 = math.exp %457 : tensor<32x40x40xf32> + %459 = tosa.reduce_sum %458 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %460 = tosa.log %459 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %461 = tosa.add %456, %460 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %462 = tosa.sub %455, %461 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %463 = math.exp %462 : tensor<32x40x40xf32> + %464 = tosa.reshape %461 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %465 = tosa.reshape %428 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %466 = tosa.matmul %463, %465 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %467 = tosa.reshape %466 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %468 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %469 = tosa.transpose %467, %468 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %470 = tosa.reshape %469 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %471 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %472 = tosa.transpose %arg34, %471 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %473 = tosa.reshape %470 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_107 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %474 = linalg.matmul {cast = #linalg.type_fn} ins(%473, %472 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_107 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %475 = tosa.reshape %474 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %476 = tosa.add %392, %475 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %477 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_108 = arith.constant 2 : i32 + %478 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%476 : tensor<1x40x4096xf32>) outs(%477 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_108 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %479 = tosa.reduce_sum %478 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %480 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %481 = tosa.reciprocal %480 : (tensor<1xf32>) -> tensor<1xf32> + %482 = tosa.mul %481, %479 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %483 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %484 = tosa.add %482, %483 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %485 = tosa.rsqrt %484 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %486 = tosa.mul %476, %485 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %487 = tosa.reshape %arg35 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %488 = tosa.mul %487, %486 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %489 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %490 = tosa.transpose %arg36, %489 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %491 = tosa.reshape %488 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_109 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %492 = linalg.matmul {cast = #linalg.type_fn} ins(%491, %490 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_109 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %493 = tosa.reshape %492 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %494 = tosa.sigmoid %493 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %495 = tosa.mul %493, %494 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %496 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %497 = tosa.transpose %arg37, %496 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %498 = tosa.reshape %488 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_110 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %499 = linalg.matmul {cast = #linalg.type_fn} ins(%498, %497 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_110 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %500 = tosa.reshape %499 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %501 = tosa.mul %495, %500 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %502 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %503 = tosa.transpose %arg38, %502 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %504 = tosa.reshape %501 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_111 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %505 = linalg.matmul {cast = #linalg.type_fn} ins(%504, %503 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_111 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %506 = tosa.reshape %505 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %507 = tosa.add %476, %506 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %508 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_112 = arith.constant 2 : i32 + %509 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%507 : tensor<1x40x4096xf32>) outs(%508 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_112 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %510 = tosa.reduce_sum %509 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %511 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %512 = tosa.reciprocal %511 : (tensor<1xf32>) -> tensor<1xf32> + %513 = tosa.mul %512, %510 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %514 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %515 = tosa.add %513, %514 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %516 = tosa.rsqrt %515 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %517 = tosa.mul %507, %516 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %518 = tosa.reshape %arg39 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %519 = tosa.mul %518, %517 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %520 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %521 = tosa.transpose %arg40, %520 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %522 = tosa.reshape %519 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_113 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %523 = linalg.matmul {cast = #linalg.type_fn} ins(%522, %521 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_113 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %524 = tosa.reshape %523 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %525 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %526 = tosa.transpose %arg41, %525 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %527 = tosa.reshape %519 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_114 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %528 = linalg.matmul {cast = #linalg.type_fn} ins(%527, %526 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_114 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %529 = tosa.reshape %528 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %530 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %531 = tosa.transpose %arg42, %530 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %532 = tosa.reshape %519 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_115 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %533 = linalg.matmul {cast = #linalg.type_fn} ins(%532, %531 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_115 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %534 = tosa.reshape %533 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %535 = tosa.reshape %524 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %536 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %537 = tosa.transpose %535, %536 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %538 = tosa.reshape %529 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %539 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %540 = tosa.transpose %538, %539 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %541 = tosa.reshape %534 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %542 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %543 = tosa.transpose %541, %542 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %544 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %545 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %546 = tosa.mul %537, %544 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_116 = tensor.extract_slice %537[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_117 = tensor.extract_slice %537[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %547 = tensor.empty() : tensor<1x32x40x64xf32> + %548 = linalg.negf ins(%extracted_slice_117 : tensor<1x32x40x64xf32>) outs(%547 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %549 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_118 = tensor.insert_slice %548 into %549[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_119 = tensor.insert_slice %extracted_slice_116 into %inserted_slice_118[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %550 = tosa.mul %inserted_slice_119, %545 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %551 = tosa.add %546, %550 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %552 = tosa.mul %540, %544 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_120 = tensor.extract_slice %540[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_121 = tensor.extract_slice %540[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %553 = tensor.empty() : tensor<1x32x40x64xf32> + %554 = linalg.negf ins(%extracted_slice_121 : tensor<1x32x40x64xf32>) outs(%553 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %555 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_122 = tensor.insert_slice %554 into %555[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_123 = tensor.insert_slice %extracted_slice_120 into %inserted_slice_122[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %556 = tosa.mul %inserted_slice_123, %545 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %557 = tosa.add %552, %556 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %558 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %559 = tosa.reshape %558 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_124 = tensor.extract_slice %559[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_125 = tensor.extract_slice %extracted_slice_124[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %560 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %561 = tosa.add %extracted_slice_125, %560 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_126 = tensor.extract_slice %561[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_127 = tensor.extract_slice %extracted_slice_126[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_128 = tensor.extract_slice %extracted_slice_127[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_129 = tensor.extract_slice %extracted_slice_128[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_130 = arith.constant 0.000000e+00 : f32 + %splat_131 = tensor.splat %cst_130 : tensor<40x40xf32> + %562 = tosa.reshape %extracted_slice_129 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %563 = tosa.add %splat_131, %562 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %564 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %565 = tosa.transpose %557, %564 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %566 = tosa.reshape %551 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %567 = tosa.reshape %565 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %568 = tosa.matmul %566, %567 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_132 = arith.constant 0.0883883461 : f32 + %splat_133 = tensor.splat %cst_132 : tensor<32x40x40xf32> + %569 = tosa.mul %568, %splat_133 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %570 = tosa.add %569, %563 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %571 = tosa.reduce_max %570 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %572 = tosa.sub %570, %571 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %573 = math.exp %572 : tensor<32x40x40xf32> + %574 = tosa.reduce_sum %573 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %575 = tosa.log %574 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %576 = tosa.add %571, %575 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %577 = tosa.sub %570, %576 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %578 = math.exp %577 : tensor<32x40x40xf32> + %579 = tosa.reshape %576 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %580 = tosa.reshape %543 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %581 = tosa.matmul %578, %580 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %582 = tosa.reshape %581 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %583 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %584 = tosa.transpose %582, %583 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %585 = tosa.reshape %584 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %586 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %587 = tosa.transpose %arg43, %586 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %588 = tosa.reshape %585 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_134 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %589 = linalg.matmul {cast = #linalg.type_fn} ins(%588, %587 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_134 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %590 = tosa.reshape %589 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %591 = tosa.add %507, %590 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %592 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_135 = arith.constant 2 : i32 + %593 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%591 : tensor<1x40x4096xf32>) outs(%592 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_135 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %594 = tosa.reduce_sum %593 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %595 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %596 = tosa.reciprocal %595 : (tensor<1xf32>) -> tensor<1xf32> + %597 = tosa.mul %596, %594 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %598 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %599 = tosa.add %597, %598 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %600 = tosa.rsqrt %599 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %601 = tosa.mul %591, %600 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %602 = tosa.reshape %arg44 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %603 = tosa.mul %602, %601 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %604 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %605 = tosa.transpose %arg45, %604 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %606 = tosa.reshape %603 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_136 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %607 = linalg.matmul {cast = #linalg.type_fn} ins(%606, %605 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_136 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %608 = tosa.reshape %607 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %609 = tosa.sigmoid %608 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %610 = tosa.mul %608, %609 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %611 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %612 = tosa.transpose %arg46, %611 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %613 = tosa.reshape %603 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_137 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %614 = linalg.matmul {cast = #linalg.type_fn} ins(%613, %612 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_137 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %615 = tosa.reshape %614 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %616 = tosa.mul %610, %615 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %617 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %618 = tosa.transpose %arg47, %617 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %619 = tosa.reshape %616 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_138 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %620 = linalg.matmul {cast = #linalg.type_fn} ins(%619, %618 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_138 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %621 = tosa.reshape %620 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %622 = tosa.add %591, %621 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %623 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_139 = arith.constant 2 : i32 + %624 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%622 : tensor<1x40x4096xf32>) outs(%623 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_139 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %625 = tosa.reduce_sum %624 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %626 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %627 = tosa.reciprocal %626 : (tensor<1xf32>) -> tensor<1xf32> + %628 = tosa.mul %627, %625 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %629 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %630 = tosa.add %628, %629 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %631 = tosa.rsqrt %630 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %632 = tosa.mul %622, %631 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %633 = tosa.reshape %arg48 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %634 = tosa.mul %633, %632 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %635 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %636 = tosa.transpose %arg49, %635 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %637 = tosa.reshape %634 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_140 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %638 = linalg.matmul {cast = #linalg.type_fn} ins(%637, %636 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_140 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %639 = tosa.reshape %638 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %640 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %641 = tosa.transpose %arg50, %640 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %642 = tosa.reshape %634 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_141 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %643 = linalg.matmul {cast = #linalg.type_fn} ins(%642, %641 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_141 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %644 = tosa.reshape %643 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %645 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %646 = tosa.transpose %arg51, %645 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %647 = tosa.reshape %634 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_142 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %648 = linalg.matmul {cast = #linalg.type_fn} ins(%647, %646 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_142 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %649 = tosa.reshape %648 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %650 = tosa.reshape %639 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %651 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %652 = tosa.transpose %650, %651 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %653 = tosa.reshape %644 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %654 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %655 = tosa.transpose %653, %654 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %656 = tosa.reshape %649 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %657 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %658 = tosa.transpose %656, %657 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %659 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %660 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %661 = tosa.mul %652, %659 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_143 = tensor.extract_slice %652[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_144 = tensor.extract_slice %652[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %662 = tensor.empty() : tensor<1x32x40x64xf32> + %663 = linalg.negf ins(%extracted_slice_144 : tensor<1x32x40x64xf32>) outs(%662 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %664 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_145 = tensor.insert_slice %663 into %664[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_146 = tensor.insert_slice %extracted_slice_143 into %inserted_slice_145[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %665 = tosa.mul %inserted_slice_146, %660 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %666 = tosa.add %661, %665 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %667 = tosa.mul %655, %659 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_147 = tensor.extract_slice %655[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_148 = tensor.extract_slice %655[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %668 = tensor.empty() : tensor<1x32x40x64xf32> + %669 = linalg.negf ins(%extracted_slice_148 : tensor<1x32x40x64xf32>) outs(%668 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %670 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_149 = tensor.insert_slice %669 into %670[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_150 = tensor.insert_slice %extracted_slice_147 into %inserted_slice_149[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %671 = tosa.mul %inserted_slice_150, %660 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %672 = tosa.add %667, %671 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %673 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %674 = tosa.reshape %673 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_151 = tensor.extract_slice %674[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_152 = tensor.extract_slice %extracted_slice_151[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %675 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %676 = tosa.add %extracted_slice_152, %675 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_153 = tensor.extract_slice %676[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_154 = tensor.extract_slice %extracted_slice_153[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_155 = tensor.extract_slice %extracted_slice_154[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_156 = tensor.extract_slice %extracted_slice_155[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_157 = arith.constant 0.000000e+00 : f32 + %splat_158 = tensor.splat %cst_157 : tensor<40x40xf32> + %677 = tosa.reshape %extracted_slice_156 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %678 = tosa.add %splat_158, %677 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %679 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %680 = tosa.transpose %672, %679 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %681 = tosa.reshape %666 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %682 = tosa.reshape %680 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %683 = tosa.matmul %681, %682 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_159 = arith.constant 0.0883883461 : f32 + %splat_160 = tensor.splat %cst_159 : tensor<32x40x40xf32> + %684 = tosa.mul %683, %splat_160 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %685 = tosa.add %684, %678 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %686 = tosa.reduce_max %685 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %687 = tosa.sub %685, %686 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %688 = math.exp %687 : tensor<32x40x40xf32> + %689 = tosa.reduce_sum %688 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %690 = tosa.log %689 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %691 = tosa.add %686, %690 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %692 = tosa.sub %685, %691 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %693 = math.exp %692 : tensor<32x40x40xf32> + %694 = tosa.reshape %691 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %695 = tosa.reshape %658 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %696 = tosa.matmul %693, %695 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %697 = tosa.reshape %696 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %698 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %699 = tosa.transpose %697, %698 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %700 = tosa.reshape %699 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %701 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %702 = tosa.transpose %arg52, %701 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %703 = tosa.reshape %700 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_161 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %704 = linalg.matmul {cast = #linalg.type_fn} ins(%703, %702 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_161 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %705 = tosa.reshape %704 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %706 = tosa.add %622, %705 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %707 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_162 = arith.constant 2 : i32 + %708 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%706 : tensor<1x40x4096xf32>) outs(%707 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_162 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %709 = tosa.reduce_sum %708 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %710 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %711 = tosa.reciprocal %710 : (tensor<1xf32>) -> tensor<1xf32> + %712 = tosa.mul %711, %709 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %713 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %714 = tosa.add %712, %713 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %715 = tosa.rsqrt %714 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %716 = tosa.mul %706, %715 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %717 = tosa.reshape %arg53 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %718 = tosa.mul %717, %716 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %719 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %720 = tosa.transpose %arg54, %719 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %721 = tosa.reshape %718 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_163 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %722 = linalg.matmul {cast = #linalg.type_fn} ins(%721, %720 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_163 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %723 = tosa.reshape %722 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %724 = tosa.sigmoid %723 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %725 = tosa.mul %723, %724 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %726 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %727 = tosa.transpose %arg55, %726 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %728 = tosa.reshape %718 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_164 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %729 = linalg.matmul {cast = #linalg.type_fn} ins(%728, %727 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_164 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %730 = tosa.reshape %729 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %731 = tosa.mul %725, %730 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %732 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %733 = tosa.transpose %arg56, %732 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %734 = tosa.reshape %731 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_165 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %735 = linalg.matmul {cast = #linalg.type_fn} ins(%734, %733 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_165 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %736 = tosa.reshape %735 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %737 = tosa.add %706, %736 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %738 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_166 = arith.constant 2 : i32 + %739 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%737 : tensor<1x40x4096xf32>) outs(%738 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_166 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %740 = tosa.reduce_sum %739 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %741 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %742 = tosa.reciprocal %741 : (tensor<1xf32>) -> tensor<1xf32> + %743 = tosa.mul %742, %740 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %744 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %745 = tosa.add %743, %744 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %746 = tosa.rsqrt %745 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %747 = tosa.mul %737, %746 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %748 = tosa.reshape %arg57 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %749 = tosa.mul %748, %747 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %750 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %751 = tosa.transpose %arg58, %750 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %752 = tosa.reshape %749 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_167 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %753 = linalg.matmul {cast = #linalg.type_fn} ins(%752, %751 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_167 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %754 = tosa.reshape %753 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %755 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %756 = tosa.transpose %arg59, %755 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %757 = tosa.reshape %749 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_168 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %758 = linalg.matmul {cast = #linalg.type_fn} ins(%757, %756 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_168 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %759 = tosa.reshape %758 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %760 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %761 = tosa.transpose %arg60, %760 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %762 = tosa.reshape %749 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_169 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %763 = linalg.matmul {cast = #linalg.type_fn} ins(%762, %761 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_169 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %764 = tosa.reshape %763 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %765 = tosa.reshape %754 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %766 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %767 = tosa.transpose %765, %766 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %768 = tosa.reshape %759 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %769 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %770 = tosa.transpose %768, %769 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %771 = tosa.reshape %764 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %772 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %773 = tosa.transpose %771, %772 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %774 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %775 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %776 = tosa.mul %767, %774 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_170 = tensor.extract_slice %767[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_171 = tensor.extract_slice %767[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %777 = tensor.empty() : tensor<1x32x40x64xf32> + %778 = linalg.negf ins(%extracted_slice_171 : tensor<1x32x40x64xf32>) outs(%777 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %779 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_172 = tensor.insert_slice %778 into %779[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_173 = tensor.insert_slice %extracted_slice_170 into %inserted_slice_172[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %780 = tosa.mul %inserted_slice_173, %775 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %781 = tosa.add %776, %780 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %782 = tosa.mul %770, %774 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_174 = tensor.extract_slice %770[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_175 = tensor.extract_slice %770[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %783 = tensor.empty() : tensor<1x32x40x64xf32> + %784 = linalg.negf ins(%extracted_slice_175 : tensor<1x32x40x64xf32>) outs(%783 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %785 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_176 = tensor.insert_slice %784 into %785[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_177 = tensor.insert_slice %extracted_slice_174 into %inserted_slice_176[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %786 = tosa.mul %inserted_slice_177, %775 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %787 = tosa.add %782, %786 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %788 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %789 = tosa.reshape %788 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_178 = tensor.extract_slice %789[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_179 = tensor.extract_slice %extracted_slice_178[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %790 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %791 = tosa.add %extracted_slice_179, %790 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_180 = tensor.extract_slice %791[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_181 = tensor.extract_slice %extracted_slice_180[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_182 = tensor.extract_slice %extracted_slice_181[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_183 = tensor.extract_slice %extracted_slice_182[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_184 = arith.constant 0.000000e+00 : f32 + %splat_185 = tensor.splat %cst_184 : tensor<40x40xf32> + %792 = tosa.reshape %extracted_slice_183 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %793 = tosa.add %splat_185, %792 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %794 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %795 = tosa.transpose %787, %794 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %796 = tosa.reshape %781 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %797 = tosa.reshape %795 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %798 = tosa.matmul %796, %797 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_186 = arith.constant 0.0883883461 : f32 + %splat_187 = tensor.splat %cst_186 : tensor<32x40x40xf32> + %799 = tosa.mul %798, %splat_187 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %800 = tosa.add %799, %793 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %801 = tosa.reduce_max %800 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %802 = tosa.sub %800, %801 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %803 = math.exp %802 : tensor<32x40x40xf32> + %804 = tosa.reduce_sum %803 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %805 = tosa.log %804 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %806 = tosa.add %801, %805 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %807 = tosa.sub %800, %806 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %808 = math.exp %807 : tensor<32x40x40xf32> + %809 = tosa.reshape %806 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %810 = tosa.reshape %773 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %811 = tosa.matmul %808, %810 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %812 = tosa.reshape %811 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %813 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %814 = tosa.transpose %812, %813 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %815 = tosa.reshape %814 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %816 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %817 = tosa.transpose %arg61, %816 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %818 = tosa.reshape %815 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_188 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %819 = linalg.matmul {cast = #linalg.type_fn} ins(%818, %817 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_188 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %820 = tosa.reshape %819 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %821 = tosa.add %737, %820 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %822 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_189 = arith.constant 2 : i32 + %823 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%821 : tensor<1x40x4096xf32>) outs(%822 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_189 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %824 = tosa.reduce_sum %823 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %825 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %826 = tosa.reciprocal %825 : (tensor<1xf32>) -> tensor<1xf32> + %827 = tosa.mul %826, %824 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %828 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %829 = tosa.add %827, %828 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %830 = tosa.rsqrt %829 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %831 = tosa.mul %821, %830 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %832 = tosa.reshape %arg62 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %833 = tosa.mul %832, %831 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %834 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %835 = tosa.transpose %arg63, %834 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %836 = tosa.reshape %833 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_190 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %837 = linalg.matmul {cast = #linalg.type_fn} ins(%836, %835 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_190 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %838 = tosa.reshape %837 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %839 = tosa.sigmoid %838 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %840 = tosa.mul %838, %839 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %841 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %842 = tosa.transpose %arg64, %841 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %843 = tosa.reshape %833 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_191 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %844 = linalg.matmul {cast = #linalg.type_fn} ins(%843, %842 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_191 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %845 = tosa.reshape %844 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %846 = tosa.mul %840, %845 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %847 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %848 = tosa.transpose %arg65, %847 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %849 = tosa.reshape %846 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_192 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %850 = linalg.matmul {cast = #linalg.type_fn} ins(%849, %848 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_192 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %851 = tosa.reshape %850 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %852 = tosa.add %821, %851 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %853 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_193 = arith.constant 2 : i32 + %854 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%852 : tensor<1x40x4096xf32>) outs(%853 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_193 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %855 = tosa.reduce_sum %854 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %856 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %857 = tosa.reciprocal %856 : (tensor<1xf32>) -> tensor<1xf32> + %858 = tosa.mul %857, %855 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %859 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %860 = tosa.add %858, %859 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %861 = tosa.rsqrt %860 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %862 = tosa.mul %852, %861 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %863 = tosa.reshape %arg66 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %864 = tosa.mul %863, %862 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %865 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %866 = tosa.transpose %arg67, %865 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %867 = tosa.reshape %864 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_194 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %868 = linalg.matmul {cast = #linalg.type_fn} ins(%867, %866 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_194 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %869 = tosa.reshape %868 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %870 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %871 = tosa.transpose %arg68, %870 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %872 = tosa.reshape %864 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_195 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %873 = linalg.matmul {cast = #linalg.type_fn} ins(%872, %871 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_195 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %874 = tosa.reshape %873 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %875 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %876 = tosa.transpose %arg69, %875 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %877 = tosa.reshape %864 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_196 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %878 = linalg.matmul {cast = #linalg.type_fn} ins(%877, %876 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_196 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %879 = tosa.reshape %878 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %880 = tosa.reshape %869 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %881 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %882 = tosa.transpose %880, %881 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %883 = tosa.reshape %874 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %884 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %885 = tosa.transpose %883, %884 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %886 = tosa.reshape %879 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %887 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %888 = tosa.transpose %886, %887 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %889 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %890 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %891 = tosa.mul %882, %889 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_197 = tensor.extract_slice %882[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_198 = tensor.extract_slice %882[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %892 = tensor.empty() : tensor<1x32x40x64xf32> + %893 = linalg.negf ins(%extracted_slice_198 : tensor<1x32x40x64xf32>) outs(%892 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %894 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_199 = tensor.insert_slice %893 into %894[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_200 = tensor.insert_slice %extracted_slice_197 into %inserted_slice_199[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %895 = tosa.mul %inserted_slice_200, %890 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %896 = tosa.add %891, %895 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %897 = tosa.mul %885, %889 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_201 = tensor.extract_slice %885[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_202 = tensor.extract_slice %885[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %898 = tensor.empty() : tensor<1x32x40x64xf32> + %899 = linalg.negf ins(%extracted_slice_202 : tensor<1x32x40x64xf32>) outs(%898 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %900 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_203 = tensor.insert_slice %899 into %900[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_204 = tensor.insert_slice %extracted_slice_201 into %inserted_slice_203[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %901 = tosa.mul %inserted_slice_204, %890 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %902 = tosa.add %897, %901 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %903 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %904 = tosa.reshape %903 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_205 = tensor.extract_slice %904[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_206 = tensor.extract_slice %extracted_slice_205[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %905 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %906 = tosa.add %extracted_slice_206, %905 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_207 = tensor.extract_slice %906[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_208 = tensor.extract_slice %extracted_slice_207[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_209 = tensor.extract_slice %extracted_slice_208[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_210 = tensor.extract_slice %extracted_slice_209[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_211 = arith.constant 0.000000e+00 : f32 + %splat_212 = tensor.splat %cst_211 : tensor<40x40xf32> + %907 = tosa.reshape %extracted_slice_210 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %908 = tosa.add %splat_212, %907 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %909 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %910 = tosa.transpose %902, %909 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %911 = tosa.reshape %896 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %912 = tosa.reshape %910 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %913 = tosa.matmul %911, %912 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_213 = arith.constant 0.0883883461 : f32 + %splat_214 = tensor.splat %cst_213 : tensor<32x40x40xf32> + %914 = tosa.mul %913, %splat_214 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %915 = tosa.add %914, %908 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %916 = tosa.reduce_max %915 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %917 = tosa.sub %915, %916 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %918 = math.exp %917 : tensor<32x40x40xf32> + %919 = tosa.reduce_sum %918 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %920 = tosa.log %919 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %921 = tosa.add %916, %920 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %922 = tosa.sub %915, %921 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %923 = math.exp %922 : tensor<32x40x40xf32> + %924 = tosa.reshape %921 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %925 = tosa.reshape %888 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %926 = tosa.matmul %923, %925 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %927 = tosa.reshape %926 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %928 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %929 = tosa.transpose %927, %928 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %930 = tosa.reshape %929 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %931 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %932 = tosa.transpose %arg70, %931 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %933 = tosa.reshape %930 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_215 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %934 = linalg.matmul {cast = #linalg.type_fn} ins(%933, %932 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_215 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %935 = tosa.reshape %934 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %936 = tosa.add %852, %935 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %937 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_216 = arith.constant 2 : i32 + %938 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%936 : tensor<1x40x4096xf32>) outs(%937 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_216 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %939 = tosa.reduce_sum %938 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %940 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %941 = tosa.reciprocal %940 : (tensor<1xf32>) -> tensor<1xf32> + %942 = tosa.mul %941, %939 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %943 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %944 = tosa.add %942, %943 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %945 = tosa.rsqrt %944 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %946 = tosa.mul %936, %945 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %947 = tosa.reshape %arg71 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %948 = tosa.mul %947, %946 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %949 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %950 = tosa.transpose %arg72, %949 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %951 = tosa.reshape %948 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_217 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %952 = linalg.matmul {cast = #linalg.type_fn} ins(%951, %950 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_217 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %953 = tosa.reshape %952 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %954 = tosa.sigmoid %953 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %955 = tosa.mul %953, %954 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %956 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %957 = tosa.transpose %arg73, %956 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %958 = tosa.reshape %948 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_218 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %959 = linalg.matmul {cast = #linalg.type_fn} ins(%958, %957 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_218 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %960 = tosa.reshape %959 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %961 = tosa.mul %955, %960 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %962 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %963 = tosa.transpose %arg74, %962 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %964 = tosa.reshape %961 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_219 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %965 = linalg.matmul {cast = #linalg.type_fn} ins(%964, %963 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_219 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %966 = tosa.reshape %965 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %967 = tosa.add %936, %966 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %968 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_220 = arith.constant 2 : i32 + %969 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%967 : tensor<1x40x4096xf32>) outs(%968 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_220 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %970 = tosa.reduce_sum %969 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %971 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %972 = tosa.reciprocal %971 : (tensor<1xf32>) -> tensor<1xf32> + %973 = tosa.mul %972, %970 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %974 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %975 = tosa.add %973, %974 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %976 = tosa.rsqrt %975 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %977 = tosa.mul %967, %976 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %978 = tosa.reshape %arg75 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %979 = tosa.mul %978, %977 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %980 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %981 = tosa.transpose %arg76, %980 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %982 = tosa.reshape %979 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_221 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %983 = linalg.matmul {cast = #linalg.type_fn} ins(%982, %981 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_221 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %984 = tosa.reshape %983 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %985 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %986 = tosa.transpose %arg77, %985 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %987 = tosa.reshape %979 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_222 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %988 = linalg.matmul {cast = #linalg.type_fn} ins(%987, %986 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_222 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %989 = tosa.reshape %988 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %990 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %991 = tosa.transpose %arg78, %990 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %992 = tosa.reshape %979 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_223 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %993 = linalg.matmul {cast = #linalg.type_fn} ins(%992, %991 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_223 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %994 = tosa.reshape %993 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %995 = tosa.reshape %984 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %996 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %997 = tosa.transpose %995, %996 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %998 = tosa.reshape %989 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %999 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1000 = tosa.transpose %998, %999 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1001 = tosa.reshape %994 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1002 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1003 = tosa.transpose %1001, %1002 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1004 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1005 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1006 = tosa.mul %997, %1004 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_224 = tensor.extract_slice %997[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_225 = tensor.extract_slice %997[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1007 = tensor.empty() : tensor<1x32x40x64xf32> + %1008 = linalg.negf ins(%extracted_slice_225 : tensor<1x32x40x64xf32>) outs(%1007 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1009 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_226 = tensor.insert_slice %1008 into %1009[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_227 = tensor.insert_slice %extracted_slice_224 into %inserted_slice_226[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1010 = tosa.mul %inserted_slice_227, %1005 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1011 = tosa.add %1006, %1010 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1012 = tosa.mul %1000, %1004 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_228 = tensor.extract_slice %1000[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_229 = tensor.extract_slice %1000[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1013 = tensor.empty() : tensor<1x32x40x64xf32> + %1014 = linalg.negf ins(%extracted_slice_229 : tensor<1x32x40x64xf32>) outs(%1013 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1015 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_230 = tensor.insert_slice %1014 into %1015[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_231 = tensor.insert_slice %extracted_slice_228 into %inserted_slice_230[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1016 = tosa.mul %inserted_slice_231, %1005 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1017 = tosa.add %1012, %1016 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1018 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1019 = tosa.reshape %1018 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_232 = tensor.extract_slice %1019[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_233 = tensor.extract_slice %extracted_slice_232[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1020 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1021 = tosa.add %extracted_slice_233, %1020 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_234 = tensor.extract_slice %1021[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_235 = tensor.extract_slice %extracted_slice_234[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_236 = tensor.extract_slice %extracted_slice_235[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_237 = tensor.extract_slice %extracted_slice_236[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_238 = arith.constant 0.000000e+00 : f32 + %splat_239 = tensor.splat %cst_238 : tensor<40x40xf32> + %1022 = tosa.reshape %extracted_slice_237 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1023 = tosa.add %splat_239, %1022 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1024 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1025 = tosa.transpose %1017, %1024 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1026 = tosa.reshape %1011 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1027 = tosa.reshape %1025 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1028 = tosa.matmul %1026, %1027 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_240 = arith.constant 0.0883883461 : f32 + %splat_241 = tensor.splat %cst_240 : tensor<32x40x40xf32> + %1029 = tosa.mul %1028, %splat_241 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1030 = tosa.add %1029, %1023 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1031 = tosa.reduce_max %1030 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1032 = tosa.sub %1030, %1031 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1033 = math.exp %1032 : tensor<32x40x40xf32> + %1034 = tosa.reduce_sum %1033 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1035 = tosa.log %1034 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1036 = tosa.add %1031, %1035 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1037 = tosa.sub %1030, %1036 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1038 = math.exp %1037 : tensor<32x40x40xf32> + %1039 = tosa.reshape %1036 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1040 = tosa.reshape %1003 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1041 = tosa.matmul %1038, %1040 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1042 = tosa.reshape %1041 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1043 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1044 = tosa.transpose %1042, %1043 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1045 = tosa.reshape %1044 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1046 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1047 = tosa.transpose %arg79, %1046 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1048 = tosa.reshape %1045 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_242 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1049 = linalg.matmul {cast = #linalg.type_fn} ins(%1048, %1047 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_242 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1050 = tosa.reshape %1049 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1051 = tosa.add %967, %1050 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1052 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_243 = arith.constant 2 : i32 + %1053 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1051 : tensor<1x40x4096xf32>) outs(%1052 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_243 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1054 = tosa.reduce_sum %1053 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1055 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1056 = tosa.reciprocal %1055 : (tensor<1xf32>) -> tensor<1xf32> + %1057 = tosa.mul %1056, %1054 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1058 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1059 = tosa.add %1057, %1058 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1060 = tosa.rsqrt %1059 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1061 = tosa.mul %1051, %1060 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1062 = tosa.reshape %arg80 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1063 = tosa.mul %1062, %1061 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1064 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1065 = tosa.transpose %arg81, %1064 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1066 = tosa.reshape %1063 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_244 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1067 = linalg.matmul {cast = #linalg.type_fn} ins(%1066, %1065 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_244 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1068 = tosa.reshape %1067 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1069 = tosa.sigmoid %1068 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1070 = tosa.mul %1068, %1069 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1071 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1072 = tosa.transpose %arg82, %1071 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1073 = tosa.reshape %1063 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_245 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1074 = linalg.matmul {cast = #linalg.type_fn} ins(%1073, %1072 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_245 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1075 = tosa.reshape %1074 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1076 = tosa.mul %1070, %1075 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1077 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1078 = tosa.transpose %arg83, %1077 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1079 = tosa.reshape %1076 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_246 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1080 = linalg.matmul {cast = #linalg.type_fn} ins(%1079, %1078 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_246 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1081 = tosa.reshape %1080 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1082 = tosa.add %1051, %1081 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1083 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_247 = arith.constant 2 : i32 + %1084 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1082 : tensor<1x40x4096xf32>) outs(%1083 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_247 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1085 = tosa.reduce_sum %1084 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1086 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1087 = tosa.reciprocal %1086 : (tensor<1xf32>) -> tensor<1xf32> + %1088 = tosa.mul %1087, %1085 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1089 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1090 = tosa.add %1088, %1089 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1091 = tosa.rsqrt %1090 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1092 = tosa.mul %1082, %1091 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1093 = tosa.reshape %arg84 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1094 = tosa.mul %1093, %1092 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1095 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1096 = tosa.transpose %arg85, %1095 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1097 = tosa.reshape %1094 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_248 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1098 = linalg.matmul {cast = #linalg.type_fn} ins(%1097, %1096 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_248 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1099 = tosa.reshape %1098 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1100 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1101 = tosa.transpose %arg86, %1100 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1102 = tosa.reshape %1094 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_249 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1103 = linalg.matmul {cast = #linalg.type_fn} ins(%1102, %1101 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_249 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1104 = tosa.reshape %1103 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1105 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1106 = tosa.transpose %arg87, %1105 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1107 = tosa.reshape %1094 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_250 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1108 = linalg.matmul {cast = #linalg.type_fn} ins(%1107, %1106 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_250 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1109 = tosa.reshape %1108 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1110 = tosa.reshape %1099 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1111 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1112 = tosa.transpose %1110, %1111 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1113 = tosa.reshape %1104 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1114 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1115 = tosa.transpose %1113, %1114 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1116 = tosa.reshape %1109 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1117 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1118 = tosa.transpose %1116, %1117 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1119 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1120 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1121 = tosa.mul %1112, %1119 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_251 = tensor.extract_slice %1112[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_252 = tensor.extract_slice %1112[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1122 = tensor.empty() : tensor<1x32x40x64xf32> + %1123 = linalg.negf ins(%extracted_slice_252 : tensor<1x32x40x64xf32>) outs(%1122 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1124 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_253 = tensor.insert_slice %1123 into %1124[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_254 = tensor.insert_slice %extracted_slice_251 into %inserted_slice_253[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1125 = tosa.mul %inserted_slice_254, %1120 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1126 = tosa.add %1121, %1125 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1127 = tosa.mul %1115, %1119 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_255 = tensor.extract_slice %1115[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_256 = tensor.extract_slice %1115[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1128 = tensor.empty() : tensor<1x32x40x64xf32> + %1129 = linalg.negf ins(%extracted_slice_256 : tensor<1x32x40x64xf32>) outs(%1128 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1130 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_257 = tensor.insert_slice %1129 into %1130[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_258 = tensor.insert_slice %extracted_slice_255 into %inserted_slice_257[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1131 = tosa.mul %inserted_slice_258, %1120 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1132 = tosa.add %1127, %1131 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1133 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1134 = tosa.reshape %1133 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_259 = tensor.extract_slice %1134[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_260 = tensor.extract_slice %extracted_slice_259[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1135 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1136 = tosa.add %extracted_slice_260, %1135 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_261 = tensor.extract_slice %1136[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_262 = tensor.extract_slice %extracted_slice_261[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_263 = tensor.extract_slice %extracted_slice_262[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_264 = tensor.extract_slice %extracted_slice_263[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_265 = arith.constant 0.000000e+00 : f32 + %splat_266 = tensor.splat %cst_265 : tensor<40x40xf32> + %1137 = tosa.reshape %extracted_slice_264 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1138 = tosa.add %splat_266, %1137 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1139 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1140 = tosa.transpose %1132, %1139 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1141 = tosa.reshape %1126 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1142 = tosa.reshape %1140 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1143 = tosa.matmul %1141, %1142 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_267 = arith.constant 0.0883883461 : f32 + %splat_268 = tensor.splat %cst_267 : tensor<32x40x40xf32> + %1144 = tosa.mul %1143, %splat_268 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1145 = tosa.add %1144, %1138 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1146 = tosa.reduce_max %1145 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1147 = tosa.sub %1145, %1146 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1148 = math.exp %1147 : tensor<32x40x40xf32> + %1149 = tosa.reduce_sum %1148 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1150 = tosa.log %1149 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1151 = tosa.add %1146, %1150 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1152 = tosa.sub %1145, %1151 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1153 = math.exp %1152 : tensor<32x40x40xf32> + %1154 = tosa.reshape %1151 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1155 = tosa.reshape %1118 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1156 = tosa.matmul %1153, %1155 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1157 = tosa.reshape %1156 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1158 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1159 = tosa.transpose %1157, %1158 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1160 = tosa.reshape %1159 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1161 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1162 = tosa.transpose %arg88, %1161 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1163 = tosa.reshape %1160 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_269 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1164 = linalg.matmul {cast = #linalg.type_fn} ins(%1163, %1162 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_269 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1165 = tosa.reshape %1164 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1166 = tosa.add %1082, %1165 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1167 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_270 = arith.constant 2 : i32 + %1168 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1166 : tensor<1x40x4096xf32>) outs(%1167 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_270 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1169 = tosa.reduce_sum %1168 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1170 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1171 = tosa.reciprocal %1170 : (tensor<1xf32>) -> tensor<1xf32> + %1172 = tosa.mul %1171, %1169 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1173 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1174 = tosa.add %1172, %1173 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1175 = tosa.rsqrt %1174 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1176 = tosa.mul %1166, %1175 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1177 = tosa.reshape %arg89 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1178 = tosa.mul %1177, %1176 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1179 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1180 = tosa.transpose %arg90, %1179 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1181 = tosa.reshape %1178 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_271 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1182 = linalg.matmul {cast = #linalg.type_fn} ins(%1181, %1180 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_271 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1183 = tosa.reshape %1182 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1184 = tosa.sigmoid %1183 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1185 = tosa.mul %1183, %1184 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1186 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1187 = tosa.transpose %arg91, %1186 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1188 = tosa.reshape %1178 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_272 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1189 = linalg.matmul {cast = #linalg.type_fn} ins(%1188, %1187 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_272 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1190 = tosa.reshape %1189 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1191 = tosa.mul %1185, %1190 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1192 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1193 = tosa.transpose %arg92, %1192 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1194 = tosa.reshape %1191 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_273 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1195 = linalg.matmul {cast = #linalg.type_fn} ins(%1194, %1193 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_273 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1196 = tosa.reshape %1195 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1197 = tosa.add %1166, %1196 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1198 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_274 = arith.constant 2 : i32 + %1199 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1197 : tensor<1x40x4096xf32>) outs(%1198 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_274 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1200 = tosa.reduce_sum %1199 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1201 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1202 = tosa.reciprocal %1201 : (tensor<1xf32>) -> tensor<1xf32> + %1203 = tosa.mul %1202, %1200 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1204 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1205 = tosa.add %1203, %1204 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1206 = tosa.rsqrt %1205 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1207 = tosa.mul %1197, %1206 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1208 = tosa.reshape %arg93 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1209 = tosa.mul %1208, %1207 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1210 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1211 = tosa.transpose %arg94, %1210 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1212 = tosa.reshape %1209 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_275 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1213 = linalg.matmul {cast = #linalg.type_fn} ins(%1212, %1211 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_275 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1214 = tosa.reshape %1213 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1215 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1216 = tosa.transpose %arg95, %1215 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1217 = tosa.reshape %1209 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_276 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1218 = linalg.matmul {cast = #linalg.type_fn} ins(%1217, %1216 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_276 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1219 = tosa.reshape %1218 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1220 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1221 = tosa.transpose %arg96, %1220 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1222 = tosa.reshape %1209 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_277 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1223 = linalg.matmul {cast = #linalg.type_fn} ins(%1222, %1221 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_277 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1224 = tosa.reshape %1223 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1225 = tosa.reshape %1214 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1226 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1227 = tosa.transpose %1225, %1226 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1228 = tosa.reshape %1219 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1229 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1230 = tosa.transpose %1228, %1229 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1231 = tosa.reshape %1224 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1232 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1233 = tosa.transpose %1231, %1232 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1234 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1235 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1236 = tosa.mul %1227, %1234 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_278 = tensor.extract_slice %1227[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_279 = tensor.extract_slice %1227[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1237 = tensor.empty() : tensor<1x32x40x64xf32> + %1238 = linalg.negf ins(%extracted_slice_279 : tensor<1x32x40x64xf32>) outs(%1237 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1239 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_280 = tensor.insert_slice %1238 into %1239[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_281 = tensor.insert_slice %extracted_slice_278 into %inserted_slice_280[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1240 = tosa.mul %inserted_slice_281, %1235 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1241 = tosa.add %1236, %1240 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1242 = tosa.mul %1230, %1234 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_282 = tensor.extract_slice %1230[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_283 = tensor.extract_slice %1230[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1243 = tensor.empty() : tensor<1x32x40x64xf32> + %1244 = linalg.negf ins(%extracted_slice_283 : tensor<1x32x40x64xf32>) outs(%1243 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1245 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_284 = tensor.insert_slice %1244 into %1245[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_285 = tensor.insert_slice %extracted_slice_282 into %inserted_slice_284[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1246 = tosa.mul %inserted_slice_285, %1235 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1247 = tosa.add %1242, %1246 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1248 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1249 = tosa.reshape %1248 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_286 = tensor.extract_slice %1249[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_287 = tensor.extract_slice %extracted_slice_286[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1250 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1251 = tosa.add %extracted_slice_287, %1250 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_288 = tensor.extract_slice %1251[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_289 = tensor.extract_slice %extracted_slice_288[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_290 = tensor.extract_slice %extracted_slice_289[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_291 = tensor.extract_slice %extracted_slice_290[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_292 = arith.constant 0.000000e+00 : f32 + %splat_293 = tensor.splat %cst_292 : tensor<40x40xf32> + %1252 = tosa.reshape %extracted_slice_291 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1253 = tosa.add %splat_293, %1252 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1254 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1255 = tosa.transpose %1247, %1254 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1256 = tosa.reshape %1241 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1257 = tosa.reshape %1255 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1258 = tosa.matmul %1256, %1257 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_294 = arith.constant 0.0883883461 : f32 + %splat_295 = tensor.splat %cst_294 : tensor<32x40x40xf32> + %1259 = tosa.mul %1258, %splat_295 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1260 = tosa.add %1259, %1253 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1261 = tosa.reduce_max %1260 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1262 = tosa.sub %1260, %1261 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1263 = math.exp %1262 : tensor<32x40x40xf32> + %1264 = tosa.reduce_sum %1263 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1265 = tosa.log %1264 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1266 = tosa.add %1261, %1265 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1267 = tosa.sub %1260, %1266 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1268 = math.exp %1267 : tensor<32x40x40xf32> + %1269 = tosa.reshape %1266 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1270 = tosa.reshape %1233 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1271 = tosa.matmul %1268, %1270 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1272 = tosa.reshape %1271 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1273 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1274 = tosa.transpose %1272, %1273 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1275 = tosa.reshape %1274 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1276 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1277 = tosa.transpose %arg97, %1276 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1278 = tosa.reshape %1275 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_296 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1279 = linalg.matmul {cast = #linalg.type_fn} ins(%1278, %1277 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_296 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1280 = tosa.reshape %1279 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1281 = tosa.add %1197, %1280 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1282 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_297 = arith.constant 2 : i32 + %1283 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1281 : tensor<1x40x4096xf32>) outs(%1282 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_297 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1284 = tosa.reduce_sum %1283 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1285 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1286 = tosa.reciprocal %1285 : (tensor<1xf32>) -> tensor<1xf32> + %1287 = tosa.mul %1286, %1284 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1288 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1289 = tosa.add %1287, %1288 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1290 = tosa.rsqrt %1289 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1291 = tosa.mul %1281, %1290 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1292 = tosa.reshape %arg98 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1293 = tosa.mul %1292, %1291 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1294 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1295 = tosa.transpose %arg99, %1294 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1296 = tosa.reshape %1293 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_298 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1297 = linalg.matmul {cast = #linalg.type_fn} ins(%1296, %1295 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_298 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1298 = tosa.reshape %1297 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1299 = tosa.sigmoid %1298 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1300 = tosa.mul %1298, %1299 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1301 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1302 = tosa.transpose %arg100, %1301 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1303 = tosa.reshape %1293 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_299 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1304 = linalg.matmul {cast = #linalg.type_fn} ins(%1303, %1302 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_299 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1305 = tosa.reshape %1304 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1306 = tosa.mul %1300, %1305 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1307 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1308 = tosa.transpose %arg101, %1307 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1309 = tosa.reshape %1306 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_300 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1310 = linalg.matmul {cast = #linalg.type_fn} ins(%1309, %1308 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_300 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1311 = tosa.reshape %1310 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1312 = tosa.add %1281, %1311 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1313 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_301 = arith.constant 2 : i32 + %1314 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1312 : tensor<1x40x4096xf32>) outs(%1313 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_301 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1315 = tosa.reduce_sum %1314 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1316 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1317 = tosa.reciprocal %1316 : (tensor<1xf32>) -> tensor<1xf32> + %1318 = tosa.mul %1317, %1315 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1319 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1320 = tosa.add %1318, %1319 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1321 = tosa.rsqrt %1320 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1322 = tosa.mul %1312, %1321 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1323 = tosa.reshape %arg102 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1324 = tosa.mul %1323, %1322 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1325 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1326 = tosa.transpose %arg103, %1325 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1327 = tosa.reshape %1324 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_302 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1328 = linalg.matmul {cast = #linalg.type_fn} ins(%1327, %1326 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_302 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1329 = tosa.reshape %1328 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1330 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1331 = tosa.transpose %arg104, %1330 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1332 = tosa.reshape %1324 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_303 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1333 = linalg.matmul {cast = #linalg.type_fn} ins(%1332, %1331 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_303 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1334 = tosa.reshape %1333 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1335 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1336 = tosa.transpose %arg105, %1335 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1337 = tosa.reshape %1324 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_304 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1338 = linalg.matmul {cast = #linalg.type_fn} ins(%1337, %1336 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_304 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1339 = tosa.reshape %1338 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1340 = tosa.reshape %1329 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1341 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1342 = tosa.transpose %1340, %1341 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1343 = tosa.reshape %1334 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1344 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1345 = tosa.transpose %1343, %1344 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1346 = tosa.reshape %1339 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1347 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1348 = tosa.transpose %1346, %1347 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1349 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1350 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1351 = tosa.mul %1342, %1349 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_305 = tensor.extract_slice %1342[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_306 = tensor.extract_slice %1342[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1352 = tensor.empty() : tensor<1x32x40x64xf32> + %1353 = linalg.negf ins(%extracted_slice_306 : tensor<1x32x40x64xf32>) outs(%1352 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1354 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_307 = tensor.insert_slice %1353 into %1354[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_308 = tensor.insert_slice %extracted_slice_305 into %inserted_slice_307[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1355 = tosa.mul %inserted_slice_308, %1350 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1356 = tosa.add %1351, %1355 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1357 = tosa.mul %1345, %1349 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_309 = tensor.extract_slice %1345[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_310 = tensor.extract_slice %1345[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1358 = tensor.empty() : tensor<1x32x40x64xf32> + %1359 = linalg.negf ins(%extracted_slice_310 : tensor<1x32x40x64xf32>) outs(%1358 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1360 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_311 = tensor.insert_slice %1359 into %1360[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_312 = tensor.insert_slice %extracted_slice_309 into %inserted_slice_311[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1361 = tosa.mul %inserted_slice_312, %1350 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1362 = tosa.add %1357, %1361 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1363 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1364 = tosa.reshape %1363 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_313 = tensor.extract_slice %1364[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_314 = tensor.extract_slice %extracted_slice_313[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1365 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1366 = tosa.add %extracted_slice_314, %1365 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_315 = tensor.extract_slice %1366[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_316 = tensor.extract_slice %extracted_slice_315[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_317 = tensor.extract_slice %extracted_slice_316[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_318 = tensor.extract_slice %extracted_slice_317[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_319 = arith.constant 0.000000e+00 : f32 + %splat_320 = tensor.splat %cst_319 : tensor<40x40xf32> + %1367 = tosa.reshape %extracted_slice_318 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1368 = tosa.add %splat_320, %1367 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1369 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1370 = tosa.transpose %1362, %1369 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1371 = tosa.reshape %1356 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1372 = tosa.reshape %1370 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1373 = tosa.matmul %1371, %1372 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_321 = arith.constant 0.0883883461 : f32 + %splat_322 = tensor.splat %cst_321 : tensor<32x40x40xf32> + %1374 = tosa.mul %1373, %splat_322 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1375 = tosa.add %1374, %1368 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1376 = tosa.reduce_max %1375 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1377 = tosa.sub %1375, %1376 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1378 = math.exp %1377 : tensor<32x40x40xf32> + %1379 = tosa.reduce_sum %1378 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1380 = tosa.log %1379 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1381 = tosa.add %1376, %1380 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1382 = tosa.sub %1375, %1381 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1383 = math.exp %1382 : tensor<32x40x40xf32> + %1384 = tosa.reshape %1381 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1385 = tosa.reshape %1348 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1386 = tosa.matmul %1383, %1385 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1387 = tosa.reshape %1386 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1388 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1389 = tosa.transpose %1387, %1388 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1390 = tosa.reshape %1389 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1391 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1392 = tosa.transpose %arg106, %1391 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1393 = tosa.reshape %1390 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_323 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1394 = linalg.matmul {cast = #linalg.type_fn} ins(%1393, %1392 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_323 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1395 = tosa.reshape %1394 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1396 = tosa.add %1312, %1395 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1397 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_324 = arith.constant 2 : i32 + %1398 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1396 : tensor<1x40x4096xf32>) outs(%1397 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_324 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1399 = tosa.reduce_sum %1398 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1400 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1401 = tosa.reciprocal %1400 : (tensor<1xf32>) -> tensor<1xf32> + %1402 = tosa.mul %1401, %1399 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1403 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1404 = tosa.add %1402, %1403 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1405 = tosa.rsqrt %1404 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1406 = tosa.mul %1396, %1405 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1407 = tosa.reshape %arg107 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1408 = tosa.mul %1407, %1406 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1409 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1410 = tosa.transpose %arg108, %1409 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1411 = tosa.reshape %1408 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_325 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1412 = linalg.matmul {cast = #linalg.type_fn} ins(%1411, %1410 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_325 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1413 = tosa.reshape %1412 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1414 = tosa.sigmoid %1413 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1415 = tosa.mul %1413, %1414 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1416 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1417 = tosa.transpose %arg109, %1416 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1418 = tosa.reshape %1408 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_326 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1419 = linalg.matmul {cast = #linalg.type_fn} ins(%1418, %1417 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_326 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1420 = tosa.reshape %1419 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1421 = tosa.mul %1415, %1420 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1422 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1423 = tosa.transpose %arg110, %1422 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1424 = tosa.reshape %1421 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_327 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1425 = linalg.matmul {cast = #linalg.type_fn} ins(%1424, %1423 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_327 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1426 = tosa.reshape %1425 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1427 = tosa.add %1396, %1426 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1428 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_328 = arith.constant 2 : i32 + %1429 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1427 : tensor<1x40x4096xf32>) outs(%1428 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_328 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1430 = tosa.reduce_sum %1429 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1431 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1432 = tosa.reciprocal %1431 : (tensor<1xf32>) -> tensor<1xf32> + %1433 = tosa.mul %1432, %1430 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1434 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1435 = tosa.add %1433, %1434 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1436 = tosa.rsqrt %1435 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1437 = tosa.mul %1427, %1436 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1438 = tosa.reshape %arg111 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1439 = tosa.mul %1438, %1437 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1440 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1441 = tosa.transpose %arg112, %1440 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1442 = tosa.reshape %1439 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_329 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1443 = linalg.matmul {cast = #linalg.type_fn} ins(%1442, %1441 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_329 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1444 = tosa.reshape %1443 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1445 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1446 = tosa.transpose %arg113, %1445 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1447 = tosa.reshape %1439 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_330 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1448 = linalg.matmul {cast = #linalg.type_fn} ins(%1447, %1446 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_330 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1449 = tosa.reshape %1448 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1450 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1451 = tosa.transpose %arg114, %1450 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1452 = tosa.reshape %1439 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_331 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1453 = linalg.matmul {cast = #linalg.type_fn} ins(%1452, %1451 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_331 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1454 = tosa.reshape %1453 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1455 = tosa.reshape %1444 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1456 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1457 = tosa.transpose %1455, %1456 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1458 = tosa.reshape %1449 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1459 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1460 = tosa.transpose %1458, %1459 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1461 = tosa.reshape %1454 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1462 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1463 = tosa.transpose %1461, %1462 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1464 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1465 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1466 = tosa.mul %1457, %1464 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_332 = tensor.extract_slice %1457[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_333 = tensor.extract_slice %1457[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1467 = tensor.empty() : tensor<1x32x40x64xf32> + %1468 = linalg.negf ins(%extracted_slice_333 : tensor<1x32x40x64xf32>) outs(%1467 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1469 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_334 = tensor.insert_slice %1468 into %1469[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_335 = tensor.insert_slice %extracted_slice_332 into %inserted_slice_334[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1470 = tosa.mul %inserted_slice_335, %1465 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1471 = tosa.add %1466, %1470 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1472 = tosa.mul %1460, %1464 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_336 = tensor.extract_slice %1460[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_337 = tensor.extract_slice %1460[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1473 = tensor.empty() : tensor<1x32x40x64xf32> + %1474 = linalg.negf ins(%extracted_slice_337 : tensor<1x32x40x64xf32>) outs(%1473 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1475 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_338 = tensor.insert_slice %1474 into %1475[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_339 = tensor.insert_slice %extracted_slice_336 into %inserted_slice_338[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1476 = tosa.mul %inserted_slice_339, %1465 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1477 = tosa.add %1472, %1476 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1478 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1479 = tosa.reshape %1478 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_340 = tensor.extract_slice %1479[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_341 = tensor.extract_slice %extracted_slice_340[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1480 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1481 = tosa.add %extracted_slice_341, %1480 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_342 = tensor.extract_slice %1481[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_343 = tensor.extract_slice %extracted_slice_342[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_344 = tensor.extract_slice %extracted_slice_343[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_345 = tensor.extract_slice %extracted_slice_344[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_346 = arith.constant 0.000000e+00 : f32 + %splat_347 = tensor.splat %cst_346 : tensor<40x40xf32> + %1482 = tosa.reshape %extracted_slice_345 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1483 = tosa.add %splat_347, %1482 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1484 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1485 = tosa.transpose %1477, %1484 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1486 = tosa.reshape %1471 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1487 = tosa.reshape %1485 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1488 = tosa.matmul %1486, %1487 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_348 = arith.constant 0.0883883461 : f32 + %splat_349 = tensor.splat %cst_348 : tensor<32x40x40xf32> + %1489 = tosa.mul %1488, %splat_349 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1490 = tosa.add %1489, %1483 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1491 = tosa.reduce_max %1490 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1492 = tosa.sub %1490, %1491 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1493 = math.exp %1492 : tensor<32x40x40xf32> + %1494 = tosa.reduce_sum %1493 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1495 = tosa.log %1494 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1496 = tosa.add %1491, %1495 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1497 = tosa.sub %1490, %1496 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1498 = math.exp %1497 : tensor<32x40x40xf32> + %1499 = tosa.reshape %1496 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1500 = tosa.reshape %1463 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1501 = tosa.matmul %1498, %1500 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1502 = tosa.reshape %1501 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1503 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1504 = tosa.transpose %1502, %1503 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1505 = tosa.reshape %1504 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1506 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1507 = tosa.transpose %arg115, %1506 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1508 = tosa.reshape %1505 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_350 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1509 = linalg.matmul {cast = #linalg.type_fn} ins(%1508, %1507 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_350 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1510 = tosa.reshape %1509 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1511 = tosa.add %1427, %1510 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1512 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_351 = arith.constant 2 : i32 + %1513 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1511 : tensor<1x40x4096xf32>) outs(%1512 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_351 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1514 = tosa.reduce_sum %1513 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1515 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1516 = tosa.reciprocal %1515 : (tensor<1xf32>) -> tensor<1xf32> + %1517 = tosa.mul %1516, %1514 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1518 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1519 = tosa.add %1517, %1518 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1520 = tosa.rsqrt %1519 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1521 = tosa.mul %1511, %1520 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1522 = tosa.reshape %arg116 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1523 = tosa.mul %1522, %1521 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1524 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1525 = tosa.transpose %arg117, %1524 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1526 = tosa.reshape %1523 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_352 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1527 = linalg.matmul {cast = #linalg.type_fn} ins(%1526, %1525 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_352 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1528 = tosa.reshape %1527 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1529 = tosa.sigmoid %1528 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1530 = tosa.mul %1528, %1529 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1531 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1532 = tosa.transpose %arg118, %1531 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1533 = tosa.reshape %1523 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_353 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1534 = linalg.matmul {cast = #linalg.type_fn} ins(%1533, %1532 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_353 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1535 = tosa.reshape %1534 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1536 = tosa.mul %1530, %1535 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1537 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1538 = tosa.transpose %arg119, %1537 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1539 = tosa.reshape %1536 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_354 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1540 = linalg.matmul {cast = #linalg.type_fn} ins(%1539, %1538 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_354 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1541 = tosa.reshape %1540 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1542 = tosa.add %1511, %1541 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1543 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_355 = arith.constant 2 : i32 + %1544 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1542 : tensor<1x40x4096xf32>) outs(%1543 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_355 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1545 = tosa.reduce_sum %1544 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1546 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1547 = tosa.reciprocal %1546 : (tensor<1xf32>) -> tensor<1xf32> + %1548 = tosa.mul %1547, %1545 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1549 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1550 = tosa.add %1548, %1549 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1551 = tosa.rsqrt %1550 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1552 = tosa.mul %1542, %1551 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1553 = tosa.reshape %arg120 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1554 = tosa.mul %1553, %1552 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1555 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1556 = tosa.transpose %arg121, %1555 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1557 = tosa.reshape %1554 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_356 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1558 = linalg.matmul {cast = #linalg.type_fn} ins(%1557, %1556 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_356 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1559 = tosa.reshape %1558 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1560 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1561 = tosa.transpose %arg122, %1560 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1562 = tosa.reshape %1554 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_357 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1563 = linalg.matmul {cast = #linalg.type_fn} ins(%1562, %1561 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_357 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1564 = tosa.reshape %1563 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1565 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1566 = tosa.transpose %arg123, %1565 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1567 = tosa.reshape %1554 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_358 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1568 = linalg.matmul {cast = #linalg.type_fn} ins(%1567, %1566 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_358 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1569 = tosa.reshape %1568 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1570 = tosa.reshape %1559 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1571 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1572 = tosa.transpose %1570, %1571 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1573 = tosa.reshape %1564 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1574 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1575 = tosa.transpose %1573, %1574 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1576 = tosa.reshape %1569 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1577 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1578 = tosa.transpose %1576, %1577 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1579 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1580 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1581 = tosa.mul %1572, %1579 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_359 = tensor.extract_slice %1572[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_360 = tensor.extract_slice %1572[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1582 = tensor.empty() : tensor<1x32x40x64xf32> + %1583 = linalg.negf ins(%extracted_slice_360 : tensor<1x32x40x64xf32>) outs(%1582 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1584 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_361 = tensor.insert_slice %1583 into %1584[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_362 = tensor.insert_slice %extracted_slice_359 into %inserted_slice_361[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1585 = tosa.mul %inserted_slice_362, %1580 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1586 = tosa.add %1581, %1585 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1587 = tosa.mul %1575, %1579 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_363 = tensor.extract_slice %1575[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_364 = tensor.extract_slice %1575[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1588 = tensor.empty() : tensor<1x32x40x64xf32> + %1589 = linalg.negf ins(%extracted_slice_364 : tensor<1x32x40x64xf32>) outs(%1588 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1590 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_365 = tensor.insert_slice %1589 into %1590[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_366 = tensor.insert_slice %extracted_slice_363 into %inserted_slice_365[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1591 = tosa.mul %inserted_slice_366, %1580 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1592 = tosa.add %1587, %1591 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1593 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1594 = tosa.reshape %1593 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_367 = tensor.extract_slice %1594[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_368 = tensor.extract_slice %extracted_slice_367[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1595 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1596 = tosa.add %extracted_slice_368, %1595 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_369 = tensor.extract_slice %1596[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_370 = tensor.extract_slice %extracted_slice_369[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_371 = tensor.extract_slice %extracted_slice_370[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_372 = tensor.extract_slice %extracted_slice_371[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_373 = arith.constant 0.000000e+00 : f32 + %splat_374 = tensor.splat %cst_373 : tensor<40x40xf32> + %1597 = tosa.reshape %extracted_slice_372 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1598 = tosa.add %splat_374, %1597 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1599 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1600 = tosa.transpose %1592, %1599 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1601 = tosa.reshape %1586 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1602 = tosa.reshape %1600 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1603 = tosa.matmul %1601, %1602 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_375 = arith.constant 0.0883883461 : f32 + %splat_376 = tensor.splat %cst_375 : tensor<32x40x40xf32> + %1604 = tosa.mul %1603, %splat_376 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1605 = tosa.add %1604, %1598 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1606 = tosa.reduce_max %1605 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1607 = tosa.sub %1605, %1606 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1608 = math.exp %1607 : tensor<32x40x40xf32> + %1609 = tosa.reduce_sum %1608 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1610 = tosa.log %1609 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1611 = tosa.add %1606, %1610 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1612 = tosa.sub %1605, %1611 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1613 = math.exp %1612 : tensor<32x40x40xf32> + %1614 = tosa.reshape %1611 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1615 = tosa.reshape %1578 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1616 = tosa.matmul %1613, %1615 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1617 = tosa.reshape %1616 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1618 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1619 = tosa.transpose %1617, %1618 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1620 = tosa.reshape %1619 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1621 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1622 = tosa.transpose %arg124, %1621 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1623 = tosa.reshape %1620 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_377 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1624 = linalg.matmul {cast = #linalg.type_fn} ins(%1623, %1622 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_377 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1625 = tosa.reshape %1624 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1626 = tosa.add %1542, %1625 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1627 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_378 = arith.constant 2 : i32 + %1628 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1626 : tensor<1x40x4096xf32>) outs(%1627 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_378 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1629 = tosa.reduce_sum %1628 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1630 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1631 = tosa.reciprocal %1630 : (tensor<1xf32>) -> tensor<1xf32> + %1632 = tosa.mul %1631, %1629 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1633 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1634 = tosa.add %1632, %1633 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1635 = tosa.rsqrt %1634 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1636 = tosa.mul %1626, %1635 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1637 = tosa.reshape %arg125 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1638 = tosa.mul %1637, %1636 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1639 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1640 = tosa.transpose %arg126, %1639 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1641 = tosa.reshape %1638 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_379 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1642 = linalg.matmul {cast = #linalg.type_fn} ins(%1641, %1640 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_379 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1643 = tosa.reshape %1642 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1644 = tosa.sigmoid %1643 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1645 = tosa.mul %1643, %1644 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1646 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1647 = tosa.transpose %arg127, %1646 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1648 = tosa.reshape %1638 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_380 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1649 = linalg.matmul {cast = #linalg.type_fn} ins(%1648, %1647 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_380 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1650 = tosa.reshape %1649 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1651 = tosa.mul %1645, %1650 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1652 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1653 = tosa.transpose %arg128, %1652 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1654 = tosa.reshape %1651 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_381 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1655 = linalg.matmul {cast = #linalg.type_fn} ins(%1654, %1653 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_381 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1656 = tosa.reshape %1655 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1657 = tosa.add %1626, %1656 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1658 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_382 = arith.constant 2 : i32 + %1659 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1657 : tensor<1x40x4096xf32>) outs(%1658 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_382 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1660 = tosa.reduce_sum %1659 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1661 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1662 = tosa.reciprocal %1661 : (tensor<1xf32>) -> tensor<1xf32> + %1663 = tosa.mul %1662, %1660 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1664 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1665 = tosa.add %1663, %1664 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1666 = tosa.rsqrt %1665 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1667 = tosa.mul %1657, %1666 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1668 = tosa.reshape %arg129 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1669 = tosa.mul %1668, %1667 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1670 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1671 = tosa.transpose %arg130, %1670 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1672 = tosa.reshape %1669 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_383 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1673 = linalg.matmul {cast = #linalg.type_fn} ins(%1672, %1671 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_383 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1674 = tosa.reshape %1673 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1675 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1676 = tosa.transpose %arg131, %1675 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1677 = tosa.reshape %1669 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_384 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1678 = linalg.matmul {cast = #linalg.type_fn} ins(%1677, %1676 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_384 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1679 = tosa.reshape %1678 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1680 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1681 = tosa.transpose %arg132, %1680 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1682 = tosa.reshape %1669 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_385 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1683 = linalg.matmul {cast = #linalg.type_fn} ins(%1682, %1681 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_385 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1684 = tosa.reshape %1683 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1685 = tosa.reshape %1674 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1686 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1687 = tosa.transpose %1685, %1686 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1688 = tosa.reshape %1679 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1689 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1690 = tosa.transpose %1688, %1689 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1691 = tosa.reshape %1684 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1692 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1693 = tosa.transpose %1691, %1692 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1694 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1695 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1696 = tosa.mul %1687, %1694 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_386 = tensor.extract_slice %1687[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_387 = tensor.extract_slice %1687[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1697 = tensor.empty() : tensor<1x32x40x64xf32> + %1698 = linalg.negf ins(%extracted_slice_387 : tensor<1x32x40x64xf32>) outs(%1697 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1699 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_388 = tensor.insert_slice %1698 into %1699[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_389 = tensor.insert_slice %extracted_slice_386 into %inserted_slice_388[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1700 = tosa.mul %inserted_slice_389, %1695 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1701 = tosa.add %1696, %1700 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1702 = tosa.mul %1690, %1694 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_390 = tensor.extract_slice %1690[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_391 = tensor.extract_slice %1690[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1703 = tensor.empty() : tensor<1x32x40x64xf32> + %1704 = linalg.negf ins(%extracted_slice_391 : tensor<1x32x40x64xf32>) outs(%1703 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1705 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_392 = tensor.insert_slice %1704 into %1705[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_393 = tensor.insert_slice %extracted_slice_390 into %inserted_slice_392[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1706 = tosa.mul %inserted_slice_393, %1695 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1707 = tosa.add %1702, %1706 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1708 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1709 = tosa.reshape %1708 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_394 = tensor.extract_slice %1709[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_395 = tensor.extract_slice %extracted_slice_394[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1710 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1711 = tosa.add %extracted_slice_395, %1710 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_396 = tensor.extract_slice %1711[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_397 = tensor.extract_slice %extracted_slice_396[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_398 = tensor.extract_slice %extracted_slice_397[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_399 = tensor.extract_slice %extracted_slice_398[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_400 = arith.constant 0.000000e+00 : f32 + %splat_401 = tensor.splat %cst_400 : tensor<40x40xf32> + %1712 = tosa.reshape %extracted_slice_399 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1713 = tosa.add %splat_401, %1712 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1714 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1715 = tosa.transpose %1707, %1714 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1716 = tosa.reshape %1701 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1717 = tosa.reshape %1715 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1718 = tosa.matmul %1716, %1717 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_402 = arith.constant 0.0883883461 : f32 + %splat_403 = tensor.splat %cst_402 : tensor<32x40x40xf32> + %1719 = tosa.mul %1718, %splat_403 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1720 = tosa.add %1719, %1713 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1721 = tosa.reduce_max %1720 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1722 = tosa.sub %1720, %1721 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1723 = math.exp %1722 : tensor<32x40x40xf32> + %1724 = tosa.reduce_sum %1723 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1725 = tosa.log %1724 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1726 = tosa.add %1721, %1725 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1727 = tosa.sub %1720, %1726 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1728 = math.exp %1727 : tensor<32x40x40xf32> + %1729 = tosa.reshape %1726 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1730 = tosa.reshape %1693 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1731 = tosa.matmul %1728, %1730 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1732 = tosa.reshape %1731 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1733 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1734 = tosa.transpose %1732, %1733 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1735 = tosa.reshape %1734 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1736 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1737 = tosa.transpose %arg133, %1736 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1738 = tosa.reshape %1735 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_404 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1739 = linalg.matmul {cast = #linalg.type_fn} ins(%1738, %1737 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_404 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1740 = tosa.reshape %1739 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1741 = tosa.add %1657, %1740 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1742 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_405 = arith.constant 2 : i32 + %1743 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1741 : tensor<1x40x4096xf32>) outs(%1742 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_405 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1744 = tosa.reduce_sum %1743 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1745 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1746 = tosa.reciprocal %1745 : (tensor<1xf32>) -> tensor<1xf32> + %1747 = tosa.mul %1746, %1744 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1748 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1749 = tosa.add %1747, %1748 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1750 = tosa.rsqrt %1749 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1751 = tosa.mul %1741, %1750 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1752 = tosa.reshape %arg134 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1753 = tosa.mul %1752, %1751 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1754 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1755 = tosa.transpose %arg135, %1754 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1756 = tosa.reshape %1753 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_406 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1757 = linalg.matmul {cast = #linalg.type_fn} ins(%1756, %1755 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_406 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1758 = tosa.reshape %1757 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1759 = tosa.sigmoid %1758 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1760 = tosa.mul %1758, %1759 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1761 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1762 = tosa.transpose %arg136, %1761 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1763 = tosa.reshape %1753 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_407 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1764 = linalg.matmul {cast = #linalg.type_fn} ins(%1763, %1762 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_407 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1765 = tosa.reshape %1764 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1766 = tosa.mul %1760, %1765 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1767 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1768 = tosa.transpose %arg137, %1767 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1769 = tosa.reshape %1766 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_408 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1770 = linalg.matmul {cast = #linalg.type_fn} ins(%1769, %1768 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_408 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1771 = tosa.reshape %1770 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1772 = tosa.add %1741, %1771 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1773 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_409 = arith.constant 2 : i32 + %1774 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1772 : tensor<1x40x4096xf32>) outs(%1773 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_409 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1775 = tosa.reduce_sum %1774 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1776 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1777 = tosa.reciprocal %1776 : (tensor<1xf32>) -> tensor<1xf32> + %1778 = tosa.mul %1777, %1775 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1779 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1780 = tosa.add %1778, %1779 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1781 = tosa.rsqrt %1780 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1782 = tosa.mul %1772, %1781 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1783 = tosa.reshape %arg138 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1784 = tosa.mul %1783, %1782 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1785 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1786 = tosa.transpose %arg139, %1785 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1787 = tosa.reshape %1784 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_410 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1788 = linalg.matmul {cast = #linalg.type_fn} ins(%1787, %1786 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_410 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1789 = tosa.reshape %1788 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1790 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1791 = tosa.transpose %arg140, %1790 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1792 = tosa.reshape %1784 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_411 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1793 = linalg.matmul {cast = #linalg.type_fn} ins(%1792, %1791 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_411 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1794 = tosa.reshape %1793 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1795 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1796 = tosa.transpose %arg141, %1795 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1797 = tosa.reshape %1784 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_412 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1798 = linalg.matmul {cast = #linalg.type_fn} ins(%1797, %1796 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_412 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1799 = tosa.reshape %1798 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1800 = tosa.reshape %1789 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1801 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1802 = tosa.transpose %1800, %1801 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1803 = tosa.reshape %1794 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1804 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1805 = tosa.transpose %1803, %1804 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1806 = tosa.reshape %1799 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1807 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1808 = tosa.transpose %1806, %1807 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1809 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1810 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1811 = tosa.mul %1802, %1809 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_413 = tensor.extract_slice %1802[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_414 = tensor.extract_slice %1802[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1812 = tensor.empty() : tensor<1x32x40x64xf32> + %1813 = linalg.negf ins(%extracted_slice_414 : tensor<1x32x40x64xf32>) outs(%1812 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1814 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_415 = tensor.insert_slice %1813 into %1814[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_416 = tensor.insert_slice %extracted_slice_413 into %inserted_slice_415[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1815 = tosa.mul %inserted_slice_416, %1810 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1816 = tosa.add %1811, %1815 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1817 = tosa.mul %1805, %1809 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_417 = tensor.extract_slice %1805[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_418 = tensor.extract_slice %1805[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1818 = tensor.empty() : tensor<1x32x40x64xf32> + %1819 = linalg.negf ins(%extracted_slice_418 : tensor<1x32x40x64xf32>) outs(%1818 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1820 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_419 = tensor.insert_slice %1819 into %1820[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_420 = tensor.insert_slice %extracted_slice_417 into %inserted_slice_419[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1821 = tosa.mul %inserted_slice_420, %1810 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1822 = tosa.add %1817, %1821 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1823 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1824 = tosa.reshape %1823 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_421 = tensor.extract_slice %1824[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_422 = tensor.extract_slice %extracted_slice_421[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1825 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1826 = tosa.add %extracted_slice_422, %1825 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_423 = tensor.extract_slice %1826[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_424 = tensor.extract_slice %extracted_slice_423[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_425 = tensor.extract_slice %extracted_slice_424[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_426 = tensor.extract_slice %extracted_slice_425[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_427 = arith.constant 0.000000e+00 : f32 + %splat_428 = tensor.splat %cst_427 : tensor<40x40xf32> + %1827 = tosa.reshape %extracted_slice_426 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1828 = tosa.add %splat_428, %1827 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1829 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1830 = tosa.transpose %1822, %1829 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1831 = tosa.reshape %1816 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1832 = tosa.reshape %1830 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1833 = tosa.matmul %1831, %1832 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_429 = arith.constant 0.0883883461 : f32 + %splat_430 = tensor.splat %cst_429 : tensor<32x40x40xf32> + %1834 = tosa.mul %1833, %splat_430 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1835 = tosa.add %1834, %1828 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1836 = tosa.reduce_max %1835 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1837 = tosa.sub %1835, %1836 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1838 = math.exp %1837 : tensor<32x40x40xf32> + %1839 = tosa.reduce_sum %1838 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1840 = tosa.log %1839 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1841 = tosa.add %1836, %1840 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1842 = tosa.sub %1835, %1841 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1843 = math.exp %1842 : tensor<32x40x40xf32> + %1844 = tosa.reshape %1841 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1845 = tosa.reshape %1808 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1846 = tosa.matmul %1843, %1845 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1847 = tosa.reshape %1846 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1848 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1849 = tosa.transpose %1847, %1848 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1850 = tosa.reshape %1849 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1851 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1852 = tosa.transpose %arg142, %1851 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1853 = tosa.reshape %1850 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_431 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1854 = linalg.matmul {cast = #linalg.type_fn} ins(%1853, %1852 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_431 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1855 = tosa.reshape %1854 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1856 = tosa.add %1772, %1855 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1857 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_432 = arith.constant 2 : i32 + %1858 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1856 : tensor<1x40x4096xf32>) outs(%1857 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_432 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1859 = tosa.reduce_sum %1858 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1860 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1861 = tosa.reciprocal %1860 : (tensor<1xf32>) -> tensor<1xf32> + %1862 = tosa.mul %1861, %1859 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1863 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1864 = tosa.add %1862, %1863 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1865 = tosa.rsqrt %1864 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1866 = tosa.mul %1856, %1865 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1867 = tosa.reshape %arg143 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1868 = tosa.mul %1867, %1866 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1869 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1870 = tosa.transpose %arg144, %1869 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1871 = tosa.reshape %1868 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_433 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1872 = linalg.matmul {cast = #linalg.type_fn} ins(%1871, %1870 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_433 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1873 = tosa.reshape %1872 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1874 = tosa.sigmoid %1873 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1875 = tosa.mul %1873, %1874 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1876 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1877 = tosa.transpose %arg145, %1876 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1878 = tosa.reshape %1868 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_434 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1879 = linalg.matmul {cast = #linalg.type_fn} ins(%1878, %1877 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_434 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1880 = tosa.reshape %1879 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1881 = tosa.mul %1875, %1880 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1882 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1883 = tosa.transpose %arg146, %1882 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1884 = tosa.reshape %1881 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_435 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1885 = linalg.matmul {cast = #linalg.type_fn} ins(%1884, %1883 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_435 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1886 = tosa.reshape %1885 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1887 = tosa.add %1856, %1886 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1888 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_436 = arith.constant 2 : i32 + %1889 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1887 : tensor<1x40x4096xf32>) outs(%1888 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_436 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1890 = tosa.reduce_sum %1889 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1891 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1892 = tosa.reciprocal %1891 : (tensor<1xf32>) -> tensor<1xf32> + %1893 = tosa.mul %1892, %1890 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1894 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1895 = tosa.add %1893, %1894 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1896 = tosa.rsqrt %1895 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1897 = tosa.mul %1887, %1896 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1898 = tosa.reshape %arg147 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1899 = tosa.mul %1898, %1897 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1900 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1901 = tosa.transpose %arg148, %1900 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1902 = tosa.reshape %1899 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_437 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1903 = linalg.matmul {cast = #linalg.type_fn} ins(%1902, %1901 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_437 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1904 = tosa.reshape %1903 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1905 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1906 = tosa.transpose %arg149, %1905 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1907 = tosa.reshape %1899 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_438 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1908 = linalg.matmul {cast = #linalg.type_fn} ins(%1907, %1906 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_438 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1909 = tosa.reshape %1908 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1910 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1911 = tosa.transpose %arg150, %1910 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1912 = tosa.reshape %1899 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_439 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1913 = linalg.matmul {cast = #linalg.type_fn} ins(%1912, %1911 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_439 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1914 = tosa.reshape %1913 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1915 = tosa.reshape %1904 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1916 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1917 = tosa.transpose %1915, %1916 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1918 = tosa.reshape %1909 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1919 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1920 = tosa.transpose %1918, %1919 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1921 = tosa.reshape %1914 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %1922 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1923 = tosa.transpose %1921, %1922 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %1924 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1925 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %1926 = tosa.mul %1917, %1924 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_440 = tensor.extract_slice %1917[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_441 = tensor.extract_slice %1917[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1927 = tensor.empty() : tensor<1x32x40x64xf32> + %1928 = linalg.negf ins(%extracted_slice_441 : tensor<1x32x40x64xf32>) outs(%1927 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1929 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_442 = tensor.insert_slice %1928 into %1929[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_443 = tensor.insert_slice %extracted_slice_440 into %inserted_slice_442[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1930 = tosa.mul %inserted_slice_443, %1925 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1931 = tosa.add %1926, %1930 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1932 = tosa.mul %1920, %1924 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_444 = tensor.extract_slice %1920[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_445 = tensor.extract_slice %1920[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %1933 = tensor.empty() : tensor<1x32x40x64xf32> + %1934 = linalg.negf ins(%extracted_slice_445 : tensor<1x32x40x64xf32>) outs(%1933 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %1935 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_446 = tensor.insert_slice %1934 into %1935[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_447 = tensor.insert_slice %extracted_slice_444 into %inserted_slice_446[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %1936 = tosa.mul %inserted_slice_447, %1925 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %1937 = tosa.add %1932, %1936 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1938 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %1939 = tosa.reshape %1938 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_448 = tensor.extract_slice %1939[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_449 = tensor.extract_slice %extracted_slice_448[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %1940 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %1941 = tosa.add %extracted_slice_449, %1940 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_450 = tensor.extract_slice %1941[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_451 = tensor.extract_slice %extracted_slice_450[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_452 = tensor.extract_slice %extracted_slice_451[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_453 = tensor.extract_slice %extracted_slice_452[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_454 = arith.constant 0.000000e+00 : f32 + %splat_455 = tensor.splat %cst_454 : tensor<40x40xf32> + %1942 = tosa.reshape %extracted_slice_453 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %1943 = tosa.add %splat_455, %1942 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %1944 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1945 = tosa.transpose %1937, %1944 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %1946 = tosa.reshape %1931 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1947 = tosa.reshape %1945 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %1948 = tosa.matmul %1946, %1947 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_456 = arith.constant 0.0883883461 : f32 + %splat_457 = tensor.splat %cst_456 : tensor<32x40x40xf32> + %1949 = tosa.mul %1948, %splat_457 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %1950 = tosa.add %1949, %1943 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %1951 = tosa.reduce_max %1950 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1952 = tosa.sub %1950, %1951 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1953 = math.exp %1952 : tensor<32x40x40xf32> + %1954 = tosa.reduce_sum %1953 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %1955 = tosa.log %1954 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1956 = tosa.add %1951, %1955 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %1957 = tosa.sub %1950, %1956 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %1958 = math.exp %1957 : tensor<32x40x40xf32> + %1959 = tosa.reshape %1956 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %1960 = tosa.reshape %1923 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %1961 = tosa.matmul %1958, %1960 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %1962 = tosa.reshape %1961 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %1963 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %1964 = tosa.transpose %1962, %1963 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %1965 = tosa.reshape %1964 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %1966 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1967 = tosa.transpose %arg151, %1966 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %1968 = tosa.reshape %1965 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_458 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %1969 = linalg.matmul {cast = #linalg.type_fn} ins(%1968, %1967 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_458 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %1970 = tosa.reshape %1969 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %1971 = tosa.add %1887, %1970 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1972 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_459 = arith.constant 2 : i32 + %1973 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1971 : tensor<1x40x4096xf32>) outs(%1972 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_459 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %1974 = tosa.reduce_sum %1973 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %1975 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %1976 = tosa.reciprocal %1975 : (tensor<1xf32>) -> tensor<1xf32> + %1977 = tosa.mul %1976, %1974 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1978 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %1979 = tosa.add %1977, %1978 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1980 = tosa.rsqrt %1979 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %1981 = tosa.mul %1971, %1980 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %1982 = tosa.reshape %arg152 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %1983 = tosa.mul %1982, %1981 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %1984 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1985 = tosa.transpose %arg153, %1984 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1986 = tosa.reshape %1983 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_460 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1987 = linalg.matmul {cast = #linalg.type_fn} ins(%1986, %1985 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_460 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1988 = tosa.reshape %1987 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1989 = tosa.sigmoid %1988 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1990 = tosa.mul %1988, %1989 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1991 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1992 = tosa.transpose %arg154, %1991 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %1993 = tosa.reshape %1983 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_461 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %1994 = linalg.matmul {cast = #linalg.type_fn} ins(%1993, %1992 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_461 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %1995 = tosa.reshape %1994 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %1996 = tosa.mul %1990, %1995 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %1997 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %1998 = tosa.transpose %arg155, %1997 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %1999 = tosa.reshape %1996 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_462 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2000 = linalg.matmul {cast = #linalg.type_fn} ins(%1999, %1998 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_462 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2001 = tosa.reshape %2000 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2002 = tosa.add %1971, %2001 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2003 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_463 = arith.constant 2 : i32 + %2004 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2002 : tensor<1x40x4096xf32>) outs(%2003 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_463 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2005 = tosa.reduce_sum %2004 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2006 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2007 = tosa.reciprocal %2006 : (tensor<1xf32>) -> tensor<1xf32> + %2008 = tosa.mul %2007, %2005 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2009 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2010 = tosa.add %2008, %2009 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2011 = tosa.rsqrt %2010 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2012 = tosa.mul %2002, %2011 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2013 = tosa.reshape %arg156 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2014 = tosa.mul %2013, %2012 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2015 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2016 = tosa.transpose %arg157, %2015 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2017 = tosa.reshape %2014 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_464 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2018 = linalg.matmul {cast = #linalg.type_fn} ins(%2017, %2016 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_464 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2019 = tosa.reshape %2018 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2020 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2021 = tosa.transpose %arg158, %2020 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2022 = tosa.reshape %2014 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_465 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2023 = linalg.matmul {cast = #linalg.type_fn} ins(%2022, %2021 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_465 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2024 = tosa.reshape %2023 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2025 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2026 = tosa.transpose %arg159, %2025 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2027 = tosa.reshape %2014 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_466 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2028 = linalg.matmul {cast = #linalg.type_fn} ins(%2027, %2026 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_466 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2029 = tosa.reshape %2028 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2030 = tosa.reshape %2019 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2031 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2032 = tosa.transpose %2030, %2031 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2033 = tosa.reshape %2024 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2034 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2035 = tosa.transpose %2033, %2034 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2036 = tosa.reshape %2029 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2037 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2038 = tosa.transpose %2036, %2037 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2039 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2040 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2041 = tosa.mul %2032, %2039 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_467 = tensor.extract_slice %2032[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_468 = tensor.extract_slice %2032[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2042 = tensor.empty() : tensor<1x32x40x64xf32> + %2043 = linalg.negf ins(%extracted_slice_468 : tensor<1x32x40x64xf32>) outs(%2042 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2044 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_469 = tensor.insert_slice %2043 into %2044[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_470 = tensor.insert_slice %extracted_slice_467 into %inserted_slice_469[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2045 = tosa.mul %inserted_slice_470, %2040 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2046 = tosa.add %2041, %2045 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2047 = tosa.mul %2035, %2039 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_471 = tensor.extract_slice %2035[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_472 = tensor.extract_slice %2035[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2048 = tensor.empty() : tensor<1x32x40x64xf32> + %2049 = linalg.negf ins(%extracted_slice_472 : tensor<1x32x40x64xf32>) outs(%2048 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2050 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_473 = tensor.insert_slice %2049 into %2050[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_474 = tensor.insert_slice %extracted_slice_471 into %inserted_slice_473[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2051 = tosa.mul %inserted_slice_474, %2040 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2052 = tosa.add %2047, %2051 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2053 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2054 = tosa.reshape %2053 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_475 = tensor.extract_slice %2054[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_476 = tensor.extract_slice %extracted_slice_475[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2055 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2056 = tosa.add %extracted_slice_476, %2055 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_477 = tensor.extract_slice %2056[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_478 = tensor.extract_slice %extracted_slice_477[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_479 = tensor.extract_slice %extracted_slice_478[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_480 = tensor.extract_slice %extracted_slice_479[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_481 = arith.constant 0.000000e+00 : f32 + %splat_482 = tensor.splat %cst_481 : tensor<40x40xf32> + %2057 = tosa.reshape %extracted_slice_480 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2058 = tosa.add %splat_482, %2057 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2059 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2060 = tosa.transpose %2052, %2059 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2061 = tosa.reshape %2046 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2062 = tosa.reshape %2060 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2063 = tosa.matmul %2061, %2062 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_483 = arith.constant 0.0883883461 : f32 + %splat_484 = tensor.splat %cst_483 : tensor<32x40x40xf32> + %2064 = tosa.mul %2063, %splat_484 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2065 = tosa.add %2064, %2058 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2066 = tosa.reduce_max %2065 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2067 = tosa.sub %2065, %2066 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2068 = math.exp %2067 : tensor<32x40x40xf32> + %2069 = tosa.reduce_sum %2068 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2070 = tosa.log %2069 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2071 = tosa.add %2066, %2070 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2072 = tosa.sub %2065, %2071 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2073 = math.exp %2072 : tensor<32x40x40xf32> + %2074 = tosa.reshape %2071 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2075 = tosa.reshape %2038 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2076 = tosa.matmul %2073, %2075 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2077 = tosa.reshape %2076 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2078 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2079 = tosa.transpose %2077, %2078 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2080 = tosa.reshape %2079 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2081 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2082 = tosa.transpose %arg160, %2081 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2083 = tosa.reshape %2080 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_485 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2084 = linalg.matmul {cast = #linalg.type_fn} ins(%2083, %2082 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_485 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2085 = tosa.reshape %2084 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2086 = tosa.add %2002, %2085 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2087 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_486 = arith.constant 2 : i32 + %2088 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2086 : tensor<1x40x4096xf32>) outs(%2087 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_486 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2089 = tosa.reduce_sum %2088 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2090 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2091 = tosa.reciprocal %2090 : (tensor<1xf32>) -> tensor<1xf32> + %2092 = tosa.mul %2091, %2089 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2093 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2094 = tosa.add %2092, %2093 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2095 = tosa.rsqrt %2094 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2096 = tosa.mul %2086, %2095 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2097 = tosa.reshape %arg161 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2098 = tosa.mul %2097, %2096 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2099 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2100 = tosa.transpose %arg162, %2099 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2101 = tosa.reshape %2098 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_487 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2102 = linalg.matmul {cast = #linalg.type_fn} ins(%2101, %2100 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_487 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2103 = tosa.reshape %2102 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2104 = tosa.sigmoid %2103 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2105 = tosa.mul %2103, %2104 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2106 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2107 = tosa.transpose %arg163, %2106 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2108 = tosa.reshape %2098 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_488 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2109 = linalg.matmul {cast = #linalg.type_fn} ins(%2108, %2107 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_488 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2110 = tosa.reshape %2109 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2111 = tosa.mul %2105, %2110 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2112 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2113 = tosa.transpose %arg164, %2112 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2114 = tosa.reshape %2111 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_489 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2115 = linalg.matmul {cast = #linalg.type_fn} ins(%2114, %2113 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_489 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2116 = tosa.reshape %2115 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2117 = tosa.add %2086, %2116 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2118 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_490 = arith.constant 2 : i32 + %2119 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2117 : tensor<1x40x4096xf32>) outs(%2118 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_490 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2120 = tosa.reduce_sum %2119 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2121 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2122 = tosa.reciprocal %2121 : (tensor<1xf32>) -> tensor<1xf32> + %2123 = tosa.mul %2122, %2120 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2124 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2125 = tosa.add %2123, %2124 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2126 = tosa.rsqrt %2125 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2127 = tosa.mul %2117, %2126 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2128 = tosa.reshape %arg165 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2129 = tosa.mul %2128, %2127 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2130 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2131 = tosa.transpose %arg166, %2130 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2132 = tosa.reshape %2129 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_491 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2133 = linalg.matmul {cast = #linalg.type_fn} ins(%2132, %2131 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_491 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2134 = tosa.reshape %2133 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2135 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2136 = tosa.transpose %arg167, %2135 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2137 = tosa.reshape %2129 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_492 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2138 = linalg.matmul {cast = #linalg.type_fn} ins(%2137, %2136 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_492 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2139 = tosa.reshape %2138 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2140 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2141 = tosa.transpose %arg168, %2140 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2142 = tosa.reshape %2129 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_493 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2143 = linalg.matmul {cast = #linalg.type_fn} ins(%2142, %2141 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_493 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2144 = tosa.reshape %2143 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2145 = tosa.reshape %2134 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2146 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2147 = tosa.transpose %2145, %2146 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2148 = tosa.reshape %2139 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2149 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2150 = tosa.transpose %2148, %2149 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2151 = tosa.reshape %2144 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2152 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2153 = tosa.transpose %2151, %2152 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2154 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2155 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2156 = tosa.mul %2147, %2154 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_494 = tensor.extract_slice %2147[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_495 = tensor.extract_slice %2147[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2157 = tensor.empty() : tensor<1x32x40x64xf32> + %2158 = linalg.negf ins(%extracted_slice_495 : tensor<1x32x40x64xf32>) outs(%2157 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2159 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_496 = tensor.insert_slice %2158 into %2159[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_497 = tensor.insert_slice %extracted_slice_494 into %inserted_slice_496[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2160 = tosa.mul %inserted_slice_497, %2155 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2161 = tosa.add %2156, %2160 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2162 = tosa.mul %2150, %2154 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_498 = tensor.extract_slice %2150[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_499 = tensor.extract_slice %2150[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2163 = tensor.empty() : tensor<1x32x40x64xf32> + %2164 = linalg.negf ins(%extracted_slice_499 : tensor<1x32x40x64xf32>) outs(%2163 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2165 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_500 = tensor.insert_slice %2164 into %2165[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_501 = tensor.insert_slice %extracted_slice_498 into %inserted_slice_500[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2166 = tosa.mul %inserted_slice_501, %2155 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2167 = tosa.add %2162, %2166 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2168 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2169 = tosa.reshape %2168 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_502 = tensor.extract_slice %2169[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_503 = tensor.extract_slice %extracted_slice_502[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2170 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2171 = tosa.add %extracted_slice_503, %2170 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_504 = tensor.extract_slice %2171[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_505 = tensor.extract_slice %extracted_slice_504[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_506 = tensor.extract_slice %extracted_slice_505[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_507 = tensor.extract_slice %extracted_slice_506[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_508 = arith.constant 0.000000e+00 : f32 + %splat_509 = tensor.splat %cst_508 : tensor<40x40xf32> + %2172 = tosa.reshape %extracted_slice_507 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2173 = tosa.add %splat_509, %2172 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2174 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2175 = tosa.transpose %2167, %2174 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2176 = tosa.reshape %2161 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2177 = tosa.reshape %2175 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2178 = tosa.matmul %2176, %2177 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_510 = arith.constant 0.0883883461 : f32 + %splat_511 = tensor.splat %cst_510 : tensor<32x40x40xf32> + %2179 = tosa.mul %2178, %splat_511 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2180 = tosa.add %2179, %2173 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2181 = tosa.reduce_max %2180 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2182 = tosa.sub %2180, %2181 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2183 = math.exp %2182 : tensor<32x40x40xf32> + %2184 = tosa.reduce_sum %2183 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2185 = tosa.log %2184 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2186 = tosa.add %2181, %2185 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2187 = tosa.sub %2180, %2186 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2188 = math.exp %2187 : tensor<32x40x40xf32> + %2189 = tosa.reshape %2186 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2190 = tosa.reshape %2153 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2191 = tosa.matmul %2188, %2190 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2192 = tosa.reshape %2191 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2193 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2194 = tosa.transpose %2192, %2193 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2195 = tosa.reshape %2194 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2196 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2197 = tosa.transpose %arg169, %2196 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2198 = tosa.reshape %2195 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_512 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2199 = linalg.matmul {cast = #linalg.type_fn} ins(%2198, %2197 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_512 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2200 = tosa.reshape %2199 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2201 = tosa.add %2117, %2200 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2202 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_513 = arith.constant 2 : i32 + %2203 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2201 : tensor<1x40x4096xf32>) outs(%2202 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_513 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2204 = tosa.reduce_sum %2203 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2205 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2206 = tosa.reciprocal %2205 : (tensor<1xf32>) -> tensor<1xf32> + %2207 = tosa.mul %2206, %2204 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2208 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2209 = tosa.add %2207, %2208 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2210 = tosa.rsqrt %2209 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2211 = tosa.mul %2201, %2210 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2212 = tosa.reshape %arg170 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2213 = tosa.mul %2212, %2211 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2214 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2215 = tosa.transpose %arg171, %2214 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2216 = tosa.reshape %2213 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_514 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2217 = linalg.matmul {cast = #linalg.type_fn} ins(%2216, %2215 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_514 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2218 = tosa.reshape %2217 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2219 = tosa.sigmoid %2218 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2220 = tosa.mul %2218, %2219 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2221 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2222 = tosa.transpose %arg172, %2221 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2223 = tosa.reshape %2213 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_515 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2224 = linalg.matmul {cast = #linalg.type_fn} ins(%2223, %2222 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_515 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2225 = tosa.reshape %2224 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2226 = tosa.mul %2220, %2225 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2227 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2228 = tosa.transpose %arg173, %2227 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2229 = tosa.reshape %2226 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_516 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2230 = linalg.matmul {cast = #linalg.type_fn} ins(%2229, %2228 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_516 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2231 = tosa.reshape %2230 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2232 = tosa.add %2201, %2231 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2233 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_517 = arith.constant 2 : i32 + %2234 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2232 : tensor<1x40x4096xf32>) outs(%2233 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_517 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2235 = tosa.reduce_sum %2234 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2236 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2237 = tosa.reciprocal %2236 : (tensor<1xf32>) -> tensor<1xf32> + %2238 = tosa.mul %2237, %2235 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2239 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2240 = tosa.add %2238, %2239 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2241 = tosa.rsqrt %2240 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2242 = tosa.mul %2232, %2241 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2243 = tosa.reshape %arg174 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2244 = tosa.mul %2243, %2242 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2245 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2246 = tosa.transpose %arg175, %2245 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2247 = tosa.reshape %2244 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_518 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2248 = linalg.matmul {cast = #linalg.type_fn} ins(%2247, %2246 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_518 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2249 = tosa.reshape %2248 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2250 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2251 = tosa.transpose %arg176, %2250 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2252 = tosa.reshape %2244 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_519 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2253 = linalg.matmul {cast = #linalg.type_fn} ins(%2252, %2251 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_519 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2254 = tosa.reshape %2253 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2255 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2256 = tosa.transpose %arg177, %2255 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2257 = tosa.reshape %2244 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_520 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2258 = linalg.matmul {cast = #linalg.type_fn} ins(%2257, %2256 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_520 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2259 = tosa.reshape %2258 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2260 = tosa.reshape %2249 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2261 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2262 = tosa.transpose %2260, %2261 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2263 = tosa.reshape %2254 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2264 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2265 = tosa.transpose %2263, %2264 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2266 = tosa.reshape %2259 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2267 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2268 = tosa.transpose %2266, %2267 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2269 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2270 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2271 = tosa.mul %2262, %2269 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_521 = tensor.extract_slice %2262[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_522 = tensor.extract_slice %2262[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2272 = tensor.empty() : tensor<1x32x40x64xf32> + %2273 = linalg.negf ins(%extracted_slice_522 : tensor<1x32x40x64xf32>) outs(%2272 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2274 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_523 = tensor.insert_slice %2273 into %2274[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_524 = tensor.insert_slice %extracted_slice_521 into %inserted_slice_523[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2275 = tosa.mul %inserted_slice_524, %2270 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2276 = tosa.add %2271, %2275 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2277 = tosa.mul %2265, %2269 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_525 = tensor.extract_slice %2265[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_526 = tensor.extract_slice %2265[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2278 = tensor.empty() : tensor<1x32x40x64xf32> + %2279 = linalg.negf ins(%extracted_slice_526 : tensor<1x32x40x64xf32>) outs(%2278 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2280 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_527 = tensor.insert_slice %2279 into %2280[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_528 = tensor.insert_slice %extracted_slice_525 into %inserted_slice_527[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2281 = tosa.mul %inserted_slice_528, %2270 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2282 = tosa.add %2277, %2281 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2283 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2284 = tosa.reshape %2283 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_529 = tensor.extract_slice %2284[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_530 = tensor.extract_slice %extracted_slice_529[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2285 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2286 = tosa.add %extracted_slice_530, %2285 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_531 = tensor.extract_slice %2286[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_532 = tensor.extract_slice %extracted_slice_531[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_533 = tensor.extract_slice %extracted_slice_532[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_534 = tensor.extract_slice %extracted_slice_533[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_535 = arith.constant 0.000000e+00 : f32 + %splat_536 = tensor.splat %cst_535 : tensor<40x40xf32> + %2287 = tosa.reshape %extracted_slice_534 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2288 = tosa.add %splat_536, %2287 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2289 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2290 = tosa.transpose %2282, %2289 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2291 = tosa.reshape %2276 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2292 = tosa.reshape %2290 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2293 = tosa.matmul %2291, %2292 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_537 = arith.constant 0.0883883461 : f32 + %splat_538 = tensor.splat %cst_537 : tensor<32x40x40xf32> + %2294 = tosa.mul %2293, %splat_538 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2295 = tosa.add %2294, %2288 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2296 = tosa.reduce_max %2295 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2297 = tosa.sub %2295, %2296 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2298 = math.exp %2297 : tensor<32x40x40xf32> + %2299 = tosa.reduce_sum %2298 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2300 = tosa.log %2299 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2301 = tosa.add %2296, %2300 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2302 = tosa.sub %2295, %2301 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2303 = math.exp %2302 : tensor<32x40x40xf32> + %2304 = tosa.reshape %2301 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2305 = tosa.reshape %2268 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2306 = tosa.matmul %2303, %2305 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2307 = tosa.reshape %2306 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2308 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2309 = tosa.transpose %2307, %2308 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2310 = tosa.reshape %2309 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2311 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2312 = tosa.transpose %arg178, %2311 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2313 = tosa.reshape %2310 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_539 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2314 = linalg.matmul {cast = #linalg.type_fn} ins(%2313, %2312 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_539 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2315 = tosa.reshape %2314 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2316 = tosa.add %2232, %2315 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2317 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_540 = arith.constant 2 : i32 + %2318 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2316 : tensor<1x40x4096xf32>) outs(%2317 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_540 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2319 = tosa.reduce_sum %2318 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2320 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2321 = tosa.reciprocal %2320 : (tensor<1xf32>) -> tensor<1xf32> + %2322 = tosa.mul %2321, %2319 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2323 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2324 = tosa.add %2322, %2323 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2325 = tosa.rsqrt %2324 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2326 = tosa.mul %2316, %2325 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2327 = tosa.reshape %arg179 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2328 = tosa.mul %2327, %2326 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2329 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2330 = tosa.transpose %arg180, %2329 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2331 = tosa.reshape %2328 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_541 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2332 = linalg.matmul {cast = #linalg.type_fn} ins(%2331, %2330 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_541 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2333 = tosa.reshape %2332 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2334 = tosa.sigmoid %2333 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2335 = tosa.mul %2333, %2334 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2336 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2337 = tosa.transpose %arg181, %2336 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2338 = tosa.reshape %2328 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_542 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2339 = linalg.matmul {cast = #linalg.type_fn} ins(%2338, %2337 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_542 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2340 = tosa.reshape %2339 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2341 = tosa.mul %2335, %2340 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2342 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2343 = tosa.transpose %arg182, %2342 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2344 = tosa.reshape %2341 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_543 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2345 = linalg.matmul {cast = #linalg.type_fn} ins(%2344, %2343 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_543 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2346 = tosa.reshape %2345 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2347 = tosa.add %2316, %2346 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2348 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_544 = arith.constant 2 : i32 + %2349 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2347 : tensor<1x40x4096xf32>) outs(%2348 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_544 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2350 = tosa.reduce_sum %2349 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2351 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2352 = tosa.reciprocal %2351 : (tensor<1xf32>) -> tensor<1xf32> + %2353 = tosa.mul %2352, %2350 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2354 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2355 = tosa.add %2353, %2354 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2356 = tosa.rsqrt %2355 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2357 = tosa.mul %2347, %2356 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2358 = tosa.reshape %arg183 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2359 = tosa.mul %2358, %2357 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2360 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2361 = tosa.transpose %arg184, %2360 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2362 = tosa.reshape %2359 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_545 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2363 = linalg.matmul {cast = #linalg.type_fn} ins(%2362, %2361 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_545 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2364 = tosa.reshape %2363 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2365 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2366 = tosa.transpose %arg185, %2365 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2367 = tosa.reshape %2359 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_546 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2368 = linalg.matmul {cast = #linalg.type_fn} ins(%2367, %2366 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_546 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2369 = tosa.reshape %2368 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2370 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2371 = tosa.transpose %arg186, %2370 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2372 = tosa.reshape %2359 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_547 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2373 = linalg.matmul {cast = #linalg.type_fn} ins(%2372, %2371 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_547 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2374 = tosa.reshape %2373 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2375 = tosa.reshape %2364 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2376 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2377 = tosa.transpose %2375, %2376 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2378 = tosa.reshape %2369 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2379 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2380 = tosa.transpose %2378, %2379 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2381 = tosa.reshape %2374 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2382 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2383 = tosa.transpose %2381, %2382 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2384 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2385 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2386 = tosa.mul %2377, %2384 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_548 = tensor.extract_slice %2377[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_549 = tensor.extract_slice %2377[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2387 = tensor.empty() : tensor<1x32x40x64xf32> + %2388 = linalg.negf ins(%extracted_slice_549 : tensor<1x32x40x64xf32>) outs(%2387 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2389 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_550 = tensor.insert_slice %2388 into %2389[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_551 = tensor.insert_slice %extracted_slice_548 into %inserted_slice_550[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2390 = tosa.mul %inserted_slice_551, %2385 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2391 = tosa.add %2386, %2390 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2392 = tosa.mul %2380, %2384 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_552 = tensor.extract_slice %2380[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_553 = tensor.extract_slice %2380[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2393 = tensor.empty() : tensor<1x32x40x64xf32> + %2394 = linalg.negf ins(%extracted_slice_553 : tensor<1x32x40x64xf32>) outs(%2393 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2395 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_554 = tensor.insert_slice %2394 into %2395[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_555 = tensor.insert_slice %extracted_slice_552 into %inserted_slice_554[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2396 = tosa.mul %inserted_slice_555, %2385 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2397 = tosa.add %2392, %2396 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2398 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2399 = tosa.reshape %2398 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_556 = tensor.extract_slice %2399[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_557 = tensor.extract_slice %extracted_slice_556[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2400 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2401 = tosa.add %extracted_slice_557, %2400 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_558 = tensor.extract_slice %2401[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_559 = tensor.extract_slice %extracted_slice_558[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_560 = tensor.extract_slice %extracted_slice_559[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_561 = tensor.extract_slice %extracted_slice_560[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_562 = arith.constant 0.000000e+00 : f32 + %splat_563 = tensor.splat %cst_562 : tensor<40x40xf32> + %2402 = tosa.reshape %extracted_slice_561 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2403 = tosa.add %splat_563, %2402 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2404 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2405 = tosa.transpose %2397, %2404 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2406 = tosa.reshape %2391 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2407 = tosa.reshape %2405 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2408 = tosa.matmul %2406, %2407 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_564 = arith.constant 0.0883883461 : f32 + %splat_565 = tensor.splat %cst_564 : tensor<32x40x40xf32> + %2409 = tosa.mul %2408, %splat_565 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2410 = tosa.add %2409, %2403 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2411 = tosa.reduce_max %2410 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2412 = tosa.sub %2410, %2411 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2413 = math.exp %2412 : tensor<32x40x40xf32> + %2414 = tosa.reduce_sum %2413 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2415 = tosa.log %2414 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2416 = tosa.add %2411, %2415 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2417 = tosa.sub %2410, %2416 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2418 = math.exp %2417 : tensor<32x40x40xf32> + %2419 = tosa.reshape %2416 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2420 = tosa.reshape %2383 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2421 = tosa.matmul %2418, %2420 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2422 = tosa.reshape %2421 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2423 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2424 = tosa.transpose %2422, %2423 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2425 = tosa.reshape %2424 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2426 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2427 = tosa.transpose %arg187, %2426 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2428 = tosa.reshape %2425 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_566 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2429 = linalg.matmul {cast = #linalg.type_fn} ins(%2428, %2427 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_566 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2430 = tosa.reshape %2429 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2431 = tosa.add %2347, %2430 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2432 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_567 = arith.constant 2 : i32 + %2433 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2431 : tensor<1x40x4096xf32>) outs(%2432 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_567 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2434 = tosa.reduce_sum %2433 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2435 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2436 = tosa.reciprocal %2435 : (tensor<1xf32>) -> tensor<1xf32> + %2437 = tosa.mul %2436, %2434 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2438 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2439 = tosa.add %2437, %2438 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2440 = tosa.rsqrt %2439 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2441 = tosa.mul %2431, %2440 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2442 = tosa.reshape %arg188 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2443 = tosa.mul %2442, %2441 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2444 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2445 = tosa.transpose %arg189, %2444 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2446 = tosa.reshape %2443 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_568 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2447 = linalg.matmul {cast = #linalg.type_fn} ins(%2446, %2445 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_568 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2448 = tosa.reshape %2447 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2449 = tosa.sigmoid %2448 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2450 = tosa.mul %2448, %2449 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2451 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2452 = tosa.transpose %arg190, %2451 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2453 = tosa.reshape %2443 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_569 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2454 = linalg.matmul {cast = #linalg.type_fn} ins(%2453, %2452 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_569 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2455 = tosa.reshape %2454 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2456 = tosa.mul %2450, %2455 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2457 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2458 = tosa.transpose %arg191, %2457 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2459 = tosa.reshape %2456 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_570 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2460 = linalg.matmul {cast = #linalg.type_fn} ins(%2459, %2458 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_570 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2461 = tosa.reshape %2460 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2462 = tosa.add %2431, %2461 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2463 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_571 = arith.constant 2 : i32 + %2464 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2462 : tensor<1x40x4096xf32>) outs(%2463 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_571 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2465 = tosa.reduce_sum %2464 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2466 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2467 = tosa.reciprocal %2466 : (tensor<1xf32>) -> tensor<1xf32> + %2468 = tosa.mul %2467, %2465 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2469 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2470 = tosa.add %2468, %2469 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2471 = tosa.rsqrt %2470 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2472 = tosa.mul %2462, %2471 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2473 = tosa.reshape %arg192 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2474 = tosa.mul %2473, %2472 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2475 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2476 = tosa.transpose %arg193, %2475 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2477 = tosa.reshape %2474 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_572 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2478 = linalg.matmul {cast = #linalg.type_fn} ins(%2477, %2476 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_572 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2479 = tosa.reshape %2478 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2480 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2481 = tosa.transpose %arg194, %2480 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2482 = tosa.reshape %2474 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_573 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2483 = linalg.matmul {cast = #linalg.type_fn} ins(%2482, %2481 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_573 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2484 = tosa.reshape %2483 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2485 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2486 = tosa.transpose %arg195, %2485 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2487 = tosa.reshape %2474 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_574 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2488 = linalg.matmul {cast = #linalg.type_fn} ins(%2487, %2486 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_574 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2489 = tosa.reshape %2488 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2490 = tosa.reshape %2479 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2491 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2492 = tosa.transpose %2490, %2491 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2493 = tosa.reshape %2484 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2494 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2495 = tosa.transpose %2493, %2494 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2496 = tosa.reshape %2489 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2497 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2498 = tosa.transpose %2496, %2497 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2499 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2500 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2501 = tosa.mul %2492, %2499 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_575 = tensor.extract_slice %2492[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_576 = tensor.extract_slice %2492[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2502 = tensor.empty() : tensor<1x32x40x64xf32> + %2503 = linalg.negf ins(%extracted_slice_576 : tensor<1x32x40x64xf32>) outs(%2502 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2504 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_577 = tensor.insert_slice %2503 into %2504[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_578 = tensor.insert_slice %extracted_slice_575 into %inserted_slice_577[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2505 = tosa.mul %inserted_slice_578, %2500 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2506 = tosa.add %2501, %2505 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2507 = tosa.mul %2495, %2499 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_579 = tensor.extract_slice %2495[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_580 = tensor.extract_slice %2495[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2508 = tensor.empty() : tensor<1x32x40x64xf32> + %2509 = linalg.negf ins(%extracted_slice_580 : tensor<1x32x40x64xf32>) outs(%2508 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2510 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_581 = tensor.insert_slice %2509 into %2510[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_582 = tensor.insert_slice %extracted_slice_579 into %inserted_slice_581[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2511 = tosa.mul %inserted_slice_582, %2500 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2512 = tosa.add %2507, %2511 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2513 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2514 = tosa.reshape %2513 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_583 = tensor.extract_slice %2514[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_584 = tensor.extract_slice %extracted_slice_583[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2515 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2516 = tosa.add %extracted_slice_584, %2515 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_585 = tensor.extract_slice %2516[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_586 = tensor.extract_slice %extracted_slice_585[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_587 = tensor.extract_slice %extracted_slice_586[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_588 = tensor.extract_slice %extracted_slice_587[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_589 = arith.constant 0.000000e+00 : f32 + %splat_590 = tensor.splat %cst_589 : tensor<40x40xf32> + %2517 = tosa.reshape %extracted_slice_588 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2518 = tosa.add %splat_590, %2517 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2519 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2520 = tosa.transpose %2512, %2519 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2521 = tosa.reshape %2506 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2522 = tosa.reshape %2520 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2523 = tosa.matmul %2521, %2522 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_591 = arith.constant 0.0883883461 : f32 + %splat_592 = tensor.splat %cst_591 : tensor<32x40x40xf32> + %2524 = tosa.mul %2523, %splat_592 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2525 = tosa.add %2524, %2518 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2526 = tosa.reduce_max %2525 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2527 = tosa.sub %2525, %2526 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2528 = math.exp %2527 : tensor<32x40x40xf32> + %2529 = tosa.reduce_sum %2528 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2530 = tosa.log %2529 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2531 = tosa.add %2526, %2530 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2532 = tosa.sub %2525, %2531 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2533 = math.exp %2532 : tensor<32x40x40xf32> + %2534 = tosa.reshape %2531 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2535 = tosa.reshape %2498 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2536 = tosa.matmul %2533, %2535 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2537 = tosa.reshape %2536 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2538 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2539 = tosa.transpose %2537, %2538 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2540 = tosa.reshape %2539 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2541 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2542 = tosa.transpose %arg196, %2541 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2543 = tosa.reshape %2540 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_593 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2544 = linalg.matmul {cast = #linalg.type_fn} ins(%2543, %2542 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_593 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2545 = tosa.reshape %2544 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2546 = tosa.add %2462, %2545 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2547 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_594 = arith.constant 2 : i32 + %2548 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2546 : tensor<1x40x4096xf32>) outs(%2547 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_594 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2549 = tosa.reduce_sum %2548 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2550 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2551 = tosa.reciprocal %2550 : (tensor<1xf32>) -> tensor<1xf32> + %2552 = tosa.mul %2551, %2549 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2553 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2554 = tosa.add %2552, %2553 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2555 = tosa.rsqrt %2554 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2556 = tosa.mul %2546, %2555 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2557 = tosa.reshape %arg197 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2558 = tosa.mul %2557, %2556 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2559 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2560 = tosa.transpose %arg198, %2559 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2561 = tosa.reshape %2558 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_595 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2562 = linalg.matmul {cast = #linalg.type_fn} ins(%2561, %2560 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_595 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2563 = tosa.reshape %2562 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2564 = tosa.sigmoid %2563 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2565 = tosa.mul %2563, %2564 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2566 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2567 = tosa.transpose %arg199, %2566 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2568 = tosa.reshape %2558 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_596 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2569 = linalg.matmul {cast = #linalg.type_fn} ins(%2568, %2567 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_596 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2570 = tosa.reshape %2569 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2571 = tosa.mul %2565, %2570 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2572 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2573 = tosa.transpose %arg200, %2572 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2574 = tosa.reshape %2571 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_597 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2575 = linalg.matmul {cast = #linalg.type_fn} ins(%2574, %2573 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_597 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2576 = tosa.reshape %2575 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2577 = tosa.add %2546, %2576 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2578 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_598 = arith.constant 2 : i32 + %2579 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2577 : tensor<1x40x4096xf32>) outs(%2578 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_598 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2580 = tosa.reduce_sum %2579 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2581 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2582 = tosa.reciprocal %2581 : (tensor<1xf32>) -> tensor<1xf32> + %2583 = tosa.mul %2582, %2580 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2584 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2585 = tosa.add %2583, %2584 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2586 = tosa.rsqrt %2585 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2587 = tosa.mul %2577, %2586 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2588 = tosa.reshape %arg201 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2589 = tosa.mul %2588, %2587 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2590 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2591 = tosa.transpose %arg202, %2590 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2592 = tosa.reshape %2589 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_599 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2593 = linalg.matmul {cast = #linalg.type_fn} ins(%2592, %2591 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_599 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2594 = tosa.reshape %2593 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2595 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2596 = tosa.transpose %arg203, %2595 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2597 = tosa.reshape %2589 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_600 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2598 = linalg.matmul {cast = #linalg.type_fn} ins(%2597, %2596 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_600 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2599 = tosa.reshape %2598 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2600 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2601 = tosa.transpose %arg204, %2600 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2602 = tosa.reshape %2589 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_601 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2603 = linalg.matmul {cast = #linalg.type_fn} ins(%2602, %2601 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_601 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2604 = tosa.reshape %2603 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2605 = tosa.reshape %2594 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2606 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2607 = tosa.transpose %2605, %2606 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2608 = tosa.reshape %2599 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2609 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2610 = tosa.transpose %2608, %2609 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2611 = tosa.reshape %2604 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2612 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2613 = tosa.transpose %2611, %2612 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2614 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2615 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2616 = tosa.mul %2607, %2614 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_602 = tensor.extract_slice %2607[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_603 = tensor.extract_slice %2607[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2617 = tensor.empty() : tensor<1x32x40x64xf32> + %2618 = linalg.negf ins(%extracted_slice_603 : tensor<1x32x40x64xf32>) outs(%2617 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2619 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_604 = tensor.insert_slice %2618 into %2619[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_605 = tensor.insert_slice %extracted_slice_602 into %inserted_slice_604[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2620 = tosa.mul %inserted_slice_605, %2615 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2621 = tosa.add %2616, %2620 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2622 = tosa.mul %2610, %2614 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_606 = tensor.extract_slice %2610[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_607 = tensor.extract_slice %2610[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2623 = tensor.empty() : tensor<1x32x40x64xf32> + %2624 = linalg.negf ins(%extracted_slice_607 : tensor<1x32x40x64xf32>) outs(%2623 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2625 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_608 = tensor.insert_slice %2624 into %2625[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_609 = tensor.insert_slice %extracted_slice_606 into %inserted_slice_608[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2626 = tosa.mul %inserted_slice_609, %2615 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2627 = tosa.add %2622, %2626 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2628 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2629 = tosa.reshape %2628 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_610 = tensor.extract_slice %2629[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_611 = tensor.extract_slice %extracted_slice_610[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2630 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2631 = tosa.add %extracted_slice_611, %2630 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_612 = tensor.extract_slice %2631[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_613 = tensor.extract_slice %extracted_slice_612[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_614 = tensor.extract_slice %extracted_slice_613[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_615 = tensor.extract_slice %extracted_slice_614[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_616 = arith.constant 0.000000e+00 : f32 + %splat_617 = tensor.splat %cst_616 : tensor<40x40xf32> + %2632 = tosa.reshape %extracted_slice_615 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2633 = tosa.add %splat_617, %2632 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2634 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2635 = tosa.transpose %2627, %2634 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2636 = tosa.reshape %2621 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2637 = tosa.reshape %2635 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2638 = tosa.matmul %2636, %2637 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_618 = arith.constant 0.0883883461 : f32 + %splat_619 = tensor.splat %cst_618 : tensor<32x40x40xf32> + %2639 = tosa.mul %2638, %splat_619 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2640 = tosa.add %2639, %2633 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2641 = tosa.reduce_max %2640 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2642 = tosa.sub %2640, %2641 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2643 = math.exp %2642 : tensor<32x40x40xf32> + %2644 = tosa.reduce_sum %2643 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2645 = tosa.log %2644 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2646 = tosa.add %2641, %2645 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2647 = tosa.sub %2640, %2646 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2648 = math.exp %2647 : tensor<32x40x40xf32> + %2649 = tosa.reshape %2646 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2650 = tosa.reshape %2613 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2651 = tosa.matmul %2648, %2650 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2652 = tosa.reshape %2651 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2653 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2654 = tosa.transpose %2652, %2653 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2655 = tosa.reshape %2654 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2656 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2657 = tosa.transpose %arg205, %2656 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2658 = tosa.reshape %2655 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_620 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2659 = linalg.matmul {cast = #linalg.type_fn} ins(%2658, %2657 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_620 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2660 = tosa.reshape %2659 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2661 = tosa.add %2577, %2660 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2662 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_621 = arith.constant 2 : i32 + %2663 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2661 : tensor<1x40x4096xf32>) outs(%2662 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_621 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2664 = tosa.reduce_sum %2663 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2665 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2666 = tosa.reciprocal %2665 : (tensor<1xf32>) -> tensor<1xf32> + %2667 = tosa.mul %2666, %2664 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2668 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2669 = tosa.add %2667, %2668 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2670 = tosa.rsqrt %2669 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2671 = tosa.mul %2661, %2670 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2672 = tosa.reshape %arg206 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2673 = tosa.mul %2672, %2671 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2674 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2675 = tosa.transpose %arg207, %2674 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2676 = tosa.reshape %2673 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_622 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2677 = linalg.matmul {cast = #linalg.type_fn} ins(%2676, %2675 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_622 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2678 = tosa.reshape %2677 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2679 = tosa.sigmoid %2678 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2680 = tosa.mul %2678, %2679 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2681 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2682 = tosa.transpose %arg208, %2681 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2683 = tosa.reshape %2673 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_623 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2684 = linalg.matmul {cast = #linalg.type_fn} ins(%2683, %2682 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_623 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2685 = tosa.reshape %2684 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2686 = tosa.mul %2680, %2685 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2687 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2688 = tosa.transpose %arg209, %2687 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2689 = tosa.reshape %2686 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_624 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2690 = linalg.matmul {cast = #linalg.type_fn} ins(%2689, %2688 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_624 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2691 = tosa.reshape %2690 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2692 = tosa.add %2661, %2691 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2693 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_625 = arith.constant 2 : i32 + %2694 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2692 : tensor<1x40x4096xf32>) outs(%2693 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_625 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2695 = tosa.reduce_sum %2694 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2696 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2697 = tosa.reciprocal %2696 : (tensor<1xf32>) -> tensor<1xf32> + %2698 = tosa.mul %2697, %2695 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2699 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2700 = tosa.add %2698, %2699 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2701 = tosa.rsqrt %2700 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2702 = tosa.mul %2692, %2701 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2703 = tosa.reshape %arg210 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2704 = tosa.mul %2703, %2702 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2705 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2706 = tosa.transpose %arg211, %2705 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2707 = tosa.reshape %2704 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_626 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2708 = linalg.matmul {cast = #linalg.type_fn} ins(%2707, %2706 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_626 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2709 = tosa.reshape %2708 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2710 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2711 = tosa.transpose %arg212, %2710 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2712 = tosa.reshape %2704 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_627 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2713 = linalg.matmul {cast = #linalg.type_fn} ins(%2712, %2711 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_627 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2714 = tosa.reshape %2713 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2715 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2716 = tosa.transpose %arg213, %2715 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2717 = tosa.reshape %2704 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_628 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2718 = linalg.matmul {cast = #linalg.type_fn} ins(%2717, %2716 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_628 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2719 = tosa.reshape %2718 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2720 = tosa.reshape %2709 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2721 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2722 = tosa.transpose %2720, %2721 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2723 = tosa.reshape %2714 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2724 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2725 = tosa.transpose %2723, %2724 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2726 = tosa.reshape %2719 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2727 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2728 = tosa.transpose %2726, %2727 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2729 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2730 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2731 = tosa.mul %2722, %2729 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_629 = tensor.extract_slice %2722[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_630 = tensor.extract_slice %2722[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2732 = tensor.empty() : tensor<1x32x40x64xf32> + %2733 = linalg.negf ins(%extracted_slice_630 : tensor<1x32x40x64xf32>) outs(%2732 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2734 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_631 = tensor.insert_slice %2733 into %2734[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_632 = tensor.insert_slice %extracted_slice_629 into %inserted_slice_631[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2735 = tosa.mul %inserted_slice_632, %2730 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2736 = tosa.add %2731, %2735 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2737 = tosa.mul %2725, %2729 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_633 = tensor.extract_slice %2725[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_634 = tensor.extract_slice %2725[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2738 = tensor.empty() : tensor<1x32x40x64xf32> + %2739 = linalg.negf ins(%extracted_slice_634 : tensor<1x32x40x64xf32>) outs(%2738 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2740 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_635 = tensor.insert_slice %2739 into %2740[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_636 = tensor.insert_slice %extracted_slice_633 into %inserted_slice_635[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2741 = tosa.mul %inserted_slice_636, %2730 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2742 = tosa.add %2737, %2741 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2743 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2744 = tosa.reshape %2743 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_637 = tensor.extract_slice %2744[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_638 = tensor.extract_slice %extracted_slice_637[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2745 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2746 = tosa.add %extracted_slice_638, %2745 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_639 = tensor.extract_slice %2746[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_640 = tensor.extract_slice %extracted_slice_639[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_641 = tensor.extract_slice %extracted_slice_640[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_642 = tensor.extract_slice %extracted_slice_641[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_643 = arith.constant 0.000000e+00 : f32 + %splat_644 = tensor.splat %cst_643 : tensor<40x40xf32> + %2747 = tosa.reshape %extracted_slice_642 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2748 = tosa.add %splat_644, %2747 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2749 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2750 = tosa.transpose %2742, %2749 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2751 = tosa.reshape %2736 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2752 = tosa.reshape %2750 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2753 = tosa.matmul %2751, %2752 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_645 = arith.constant 0.0883883461 : f32 + %splat_646 = tensor.splat %cst_645 : tensor<32x40x40xf32> + %2754 = tosa.mul %2753, %splat_646 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2755 = tosa.add %2754, %2748 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2756 = tosa.reduce_max %2755 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2757 = tosa.sub %2755, %2756 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2758 = math.exp %2757 : tensor<32x40x40xf32> + %2759 = tosa.reduce_sum %2758 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2760 = tosa.log %2759 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2761 = tosa.add %2756, %2760 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2762 = tosa.sub %2755, %2761 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2763 = math.exp %2762 : tensor<32x40x40xf32> + %2764 = tosa.reshape %2761 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2765 = tosa.reshape %2728 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2766 = tosa.matmul %2763, %2765 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2767 = tosa.reshape %2766 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2768 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2769 = tosa.transpose %2767, %2768 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2770 = tosa.reshape %2769 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2771 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2772 = tosa.transpose %arg214, %2771 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2773 = tosa.reshape %2770 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_647 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2774 = linalg.matmul {cast = #linalg.type_fn} ins(%2773, %2772 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_647 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2775 = tosa.reshape %2774 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2776 = tosa.add %2692, %2775 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2777 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_648 = arith.constant 2 : i32 + %2778 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2776 : tensor<1x40x4096xf32>) outs(%2777 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_648 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2779 = tosa.reduce_sum %2778 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2780 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2781 = tosa.reciprocal %2780 : (tensor<1xf32>) -> tensor<1xf32> + %2782 = tosa.mul %2781, %2779 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2783 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2784 = tosa.add %2782, %2783 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2785 = tosa.rsqrt %2784 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2786 = tosa.mul %2776, %2785 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2787 = tosa.reshape %arg215 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2788 = tosa.mul %2787, %2786 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2789 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2790 = tosa.transpose %arg216, %2789 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2791 = tosa.reshape %2788 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_649 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2792 = linalg.matmul {cast = #linalg.type_fn} ins(%2791, %2790 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_649 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2793 = tosa.reshape %2792 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2794 = tosa.sigmoid %2793 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2795 = tosa.mul %2793, %2794 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2796 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2797 = tosa.transpose %arg217, %2796 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2798 = tosa.reshape %2788 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_650 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2799 = linalg.matmul {cast = #linalg.type_fn} ins(%2798, %2797 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_650 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2800 = tosa.reshape %2799 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2801 = tosa.mul %2795, %2800 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2802 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2803 = tosa.transpose %arg218, %2802 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2804 = tosa.reshape %2801 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_651 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2805 = linalg.matmul {cast = #linalg.type_fn} ins(%2804, %2803 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_651 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2806 = tosa.reshape %2805 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2807 = tosa.add %2776, %2806 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2808 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_652 = arith.constant 2 : i32 + %2809 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2807 : tensor<1x40x4096xf32>) outs(%2808 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_652 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2810 = tosa.reduce_sum %2809 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2811 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2812 = tosa.reciprocal %2811 : (tensor<1xf32>) -> tensor<1xf32> + %2813 = tosa.mul %2812, %2810 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2814 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2815 = tosa.add %2813, %2814 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2816 = tosa.rsqrt %2815 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2817 = tosa.mul %2807, %2816 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2818 = tosa.reshape %arg219 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2819 = tosa.mul %2818, %2817 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2820 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2821 = tosa.transpose %arg220, %2820 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2822 = tosa.reshape %2819 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_653 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2823 = linalg.matmul {cast = #linalg.type_fn} ins(%2822, %2821 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_653 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2824 = tosa.reshape %2823 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2825 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2826 = tosa.transpose %arg221, %2825 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2827 = tosa.reshape %2819 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_654 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2828 = linalg.matmul {cast = #linalg.type_fn} ins(%2827, %2826 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_654 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2829 = tosa.reshape %2828 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2830 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2831 = tosa.transpose %arg222, %2830 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2832 = tosa.reshape %2819 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_655 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2833 = linalg.matmul {cast = #linalg.type_fn} ins(%2832, %2831 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_655 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2834 = tosa.reshape %2833 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2835 = tosa.reshape %2824 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2836 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2837 = tosa.transpose %2835, %2836 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2838 = tosa.reshape %2829 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2839 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2840 = tosa.transpose %2838, %2839 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2841 = tosa.reshape %2834 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2842 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2843 = tosa.transpose %2841, %2842 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2844 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2845 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2846 = tosa.mul %2837, %2844 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_656 = tensor.extract_slice %2837[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_657 = tensor.extract_slice %2837[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2847 = tensor.empty() : tensor<1x32x40x64xf32> + %2848 = linalg.negf ins(%extracted_slice_657 : tensor<1x32x40x64xf32>) outs(%2847 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2849 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_658 = tensor.insert_slice %2848 into %2849[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_659 = tensor.insert_slice %extracted_slice_656 into %inserted_slice_658[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2850 = tosa.mul %inserted_slice_659, %2845 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2851 = tosa.add %2846, %2850 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2852 = tosa.mul %2840, %2844 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_660 = tensor.extract_slice %2840[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_661 = tensor.extract_slice %2840[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2853 = tensor.empty() : tensor<1x32x40x64xf32> + %2854 = linalg.negf ins(%extracted_slice_661 : tensor<1x32x40x64xf32>) outs(%2853 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2855 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_662 = tensor.insert_slice %2854 into %2855[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_663 = tensor.insert_slice %extracted_slice_660 into %inserted_slice_662[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2856 = tosa.mul %inserted_slice_663, %2845 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2857 = tosa.add %2852, %2856 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2858 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2859 = tosa.reshape %2858 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_664 = tensor.extract_slice %2859[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_665 = tensor.extract_slice %extracted_slice_664[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2860 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2861 = tosa.add %extracted_slice_665, %2860 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_666 = tensor.extract_slice %2861[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_667 = tensor.extract_slice %extracted_slice_666[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_668 = tensor.extract_slice %extracted_slice_667[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_669 = tensor.extract_slice %extracted_slice_668[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_670 = arith.constant 0.000000e+00 : f32 + %splat_671 = tensor.splat %cst_670 : tensor<40x40xf32> + %2862 = tosa.reshape %extracted_slice_669 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2863 = tosa.add %splat_671, %2862 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2864 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2865 = tosa.transpose %2857, %2864 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2866 = tosa.reshape %2851 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2867 = tosa.reshape %2865 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2868 = tosa.matmul %2866, %2867 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_672 = arith.constant 0.0883883461 : f32 + %splat_673 = tensor.splat %cst_672 : tensor<32x40x40xf32> + %2869 = tosa.mul %2868, %splat_673 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2870 = tosa.add %2869, %2863 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2871 = tosa.reduce_max %2870 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2872 = tosa.sub %2870, %2871 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2873 = math.exp %2872 : tensor<32x40x40xf32> + %2874 = tosa.reduce_sum %2873 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2875 = tosa.log %2874 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2876 = tosa.add %2871, %2875 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2877 = tosa.sub %2870, %2876 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2878 = math.exp %2877 : tensor<32x40x40xf32> + %2879 = tosa.reshape %2876 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2880 = tosa.reshape %2843 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2881 = tosa.matmul %2878, %2880 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2882 = tosa.reshape %2881 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2883 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2884 = tosa.transpose %2882, %2883 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %2885 = tosa.reshape %2884 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %2886 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2887 = tosa.transpose %arg223, %2886 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2888 = tosa.reshape %2885 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_674 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2889 = linalg.matmul {cast = #linalg.type_fn} ins(%2888, %2887 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_674 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2890 = tosa.reshape %2889 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2891 = tosa.add %2807, %2890 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2892 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_675 = arith.constant 2 : i32 + %2893 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2891 : tensor<1x40x4096xf32>) outs(%2892 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_675 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2894 = tosa.reduce_sum %2893 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2895 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2896 = tosa.reciprocal %2895 : (tensor<1xf32>) -> tensor<1xf32> + %2897 = tosa.mul %2896, %2894 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2898 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2899 = tosa.add %2897, %2898 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2900 = tosa.rsqrt %2899 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2901 = tosa.mul %2891, %2900 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2902 = tosa.reshape %arg224 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2903 = tosa.mul %2902, %2901 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2904 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2905 = tosa.transpose %arg225, %2904 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2906 = tosa.reshape %2903 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_676 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2907 = linalg.matmul {cast = #linalg.type_fn} ins(%2906, %2905 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_676 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2908 = tosa.reshape %2907 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2909 = tosa.sigmoid %2908 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2910 = tosa.mul %2908, %2909 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2911 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2912 = tosa.transpose %arg226, %2911 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %2913 = tosa.reshape %2903 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_677 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %2914 = linalg.matmul {cast = #linalg.type_fn} ins(%2913, %2912 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_677 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %2915 = tosa.reshape %2914 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %2916 = tosa.mul %2910, %2915 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %2917 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2918 = tosa.transpose %arg227, %2917 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %2919 = tosa.reshape %2916 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_678 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2920 = linalg.matmul {cast = #linalg.type_fn} ins(%2919, %2918 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_678 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2921 = tosa.reshape %2920 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2922 = tosa.add %2891, %2921 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2923 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_679 = arith.constant 2 : i32 + %2924 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2922 : tensor<1x40x4096xf32>) outs(%2923 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_679 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %2925 = tosa.reduce_sum %2924 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %2926 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %2927 = tosa.reciprocal %2926 : (tensor<1xf32>) -> tensor<1xf32> + %2928 = tosa.mul %2927, %2925 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2929 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %2930 = tosa.add %2928, %2929 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2931 = tosa.rsqrt %2930 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %2932 = tosa.mul %2922, %2931 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %2933 = tosa.reshape %arg228 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %2934 = tosa.mul %2933, %2932 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %2935 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2936 = tosa.transpose %arg229, %2935 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2937 = tosa.reshape %2934 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_680 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2938 = linalg.matmul {cast = #linalg.type_fn} ins(%2937, %2936 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_680 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2939 = tosa.reshape %2938 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2940 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2941 = tosa.transpose %arg230, %2940 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2942 = tosa.reshape %2934 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_681 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2943 = linalg.matmul {cast = #linalg.type_fn} ins(%2942, %2941 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_681 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2944 = tosa.reshape %2943 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2945 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %2946 = tosa.transpose %arg231, %2945 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %2947 = tosa.reshape %2934 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_682 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %2948 = linalg.matmul {cast = #linalg.type_fn} ins(%2947, %2946 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_682 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %2949 = tosa.reshape %2948 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %2950 = tosa.reshape %2939 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2951 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2952 = tosa.transpose %2950, %2951 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2953 = tosa.reshape %2944 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2954 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2955 = tosa.transpose %2953, %2954 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2956 = tosa.reshape %2949 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %2957 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2958 = tosa.transpose %2956, %2957 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %2959 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2960 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %2961 = tosa.mul %2952, %2959 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_683 = tensor.extract_slice %2952[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_684 = tensor.extract_slice %2952[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2962 = tensor.empty() : tensor<1x32x40x64xf32> + %2963 = linalg.negf ins(%extracted_slice_684 : tensor<1x32x40x64xf32>) outs(%2962 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2964 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_685 = tensor.insert_slice %2963 into %2964[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_686 = tensor.insert_slice %extracted_slice_683 into %inserted_slice_685[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2965 = tosa.mul %inserted_slice_686, %2960 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2966 = tosa.add %2961, %2965 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2967 = tosa.mul %2955, %2959 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_687 = tensor.extract_slice %2955[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_688 = tensor.extract_slice %2955[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %2968 = tensor.empty() : tensor<1x32x40x64xf32> + %2969 = linalg.negf ins(%extracted_slice_688 : tensor<1x32x40x64xf32>) outs(%2968 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %2970 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_689 = tensor.insert_slice %2969 into %2970[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_690 = tensor.insert_slice %extracted_slice_687 into %inserted_slice_689[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %2971 = tosa.mul %inserted_slice_690, %2960 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %2972 = tosa.add %2967, %2971 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2973 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %2974 = tosa.reshape %2973 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_691 = tensor.extract_slice %2974[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_692 = tensor.extract_slice %extracted_slice_691[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %2975 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %2976 = tosa.add %extracted_slice_692, %2975 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_693 = tensor.extract_slice %2976[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_694 = tensor.extract_slice %extracted_slice_693[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_695 = tensor.extract_slice %extracted_slice_694[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_696 = tensor.extract_slice %extracted_slice_695[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_697 = arith.constant 0.000000e+00 : f32 + %splat_698 = tensor.splat %cst_697 : tensor<40x40xf32> + %2977 = tosa.reshape %extracted_slice_696 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %2978 = tosa.add %splat_698, %2977 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %2979 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2980 = tosa.transpose %2972, %2979 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %2981 = tosa.reshape %2966 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2982 = tosa.reshape %2980 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %2983 = tosa.matmul %2981, %2982 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_699 = arith.constant 0.0883883461 : f32 + %splat_700 = tensor.splat %cst_699 : tensor<32x40x40xf32> + %2984 = tosa.mul %2983, %splat_700 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %2985 = tosa.add %2984, %2978 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %2986 = tosa.reduce_max %2985 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2987 = tosa.sub %2985, %2986 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2988 = math.exp %2987 : tensor<32x40x40xf32> + %2989 = tosa.reduce_sum %2988 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %2990 = tosa.log %2989 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2991 = tosa.add %2986, %2990 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %2992 = tosa.sub %2985, %2991 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %2993 = math.exp %2992 : tensor<32x40x40xf32> + %2994 = tosa.reshape %2991 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %2995 = tosa.reshape %2958 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %2996 = tosa.matmul %2993, %2995 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %2997 = tosa.reshape %2996 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %2998 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %2999 = tosa.transpose %2997, %2998 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3000 = tosa.reshape %2999 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3001 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3002 = tosa.transpose %arg232, %3001 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3003 = tosa.reshape %3000 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_701 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3004 = linalg.matmul {cast = #linalg.type_fn} ins(%3003, %3002 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_701 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3005 = tosa.reshape %3004 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3006 = tosa.add %2922, %3005 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3007 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_702 = arith.constant 2 : i32 + %3008 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3006 : tensor<1x40x4096xf32>) outs(%3007 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_702 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3009 = tosa.reduce_sum %3008 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3010 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3011 = tosa.reciprocal %3010 : (tensor<1xf32>) -> tensor<1xf32> + %3012 = tosa.mul %3011, %3009 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3013 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3014 = tosa.add %3012, %3013 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3015 = tosa.rsqrt %3014 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3016 = tosa.mul %3006, %3015 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3017 = tosa.reshape %arg233 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3018 = tosa.mul %3017, %3016 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3019 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3020 = tosa.transpose %arg234, %3019 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3021 = tosa.reshape %3018 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_703 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3022 = linalg.matmul {cast = #linalg.type_fn} ins(%3021, %3020 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_703 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3023 = tosa.reshape %3022 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3024 = tosa.sigmoid %3023 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3025 = tosa.mul %3023, %3024 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3026 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3027 = tosa.transpose %arg235, %3026 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3028 = tosa.reshape %3018 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_704 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3029 = linalg.matmul {cast = #linalg.type_fn} ins(%3028, %3027 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_704 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3030 = tosa.reshape %3029 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3031 = tosa.mul %3025, %3030 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3032 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3033 = tosa.transpose %arg236, %3032 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3034 = tosa.reshape %3031 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_705 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3035 = linalg.matmul {cast = #linalg.type_fn} ins(%3034, %3033 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_705 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3036 = tosa.reshape %3035 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3037 = tosa.add %3006, %3036 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3038 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_706 = arith.constant 2 : i32 + %3039 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3037 : tensor<1x40x4096xf32>) outs(%3038 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_706 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3040 = tosa.reduce_sum %3039 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3041 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3042 = tosa.reciprocal %3041 : (tensor<1xf32>) -> tensor<1xf32> + %3043 = tosa.mul %3042, %3040 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3044 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3045 = tosa.add %3043, %3044 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3046 = tosa.rsqrt %3045 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3047 = tosa.mul %3037, %3046 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3048 = tosa.reshape %arg237 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3049 = tosa.mul %3048, %3047 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3050 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3051 = tosa.transpose %arg238, %3050 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3052 = tosa.reshape %3049 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_707 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3053 = linalg.matmul {cast = #linalg.type_fn} ins(%3052, %3051 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_707 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3054 = tosa.reshape %3053 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3055 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3056 = tosa.transpose %arg239, %3055 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3057 = tosa.reshape %3049 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_708 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3058 = linalg.matmul {cast = #linalg.type_fn} ins(%3057, %3056 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_708 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3059 = tosa.reshape %3058 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3060 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3061 = tosa.transpose %arg240, %3060 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3062 = tosa.reshape %3049 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_709 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3063 = linalg.matmul {cast = #linalg.type_fn} ins(%3062, %3061 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_709 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3064 = tosa.reshape %3063 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3065 = tosa.reshape %3054 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3066 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3067 = tosa.transpose %3065, %3066 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3068 = tosa.reshape %3059 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3069 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3070 = tosa.transpose %3068, %3069 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3071 = tosa.reshape %3064 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3072 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3073 = tosa.transpose %3071, %3072 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3074 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3075 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3076 = tosa.mul %3067, %3074 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_710 = tensor.extract_slice %3067[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_711 = tensor.extract_slice %3067[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3077 = tensor.empty() : tensor<1x32x40x64xf32> + %3078 = linalg.negf ins(%extracted_slice_711 : tensor<1x32x40x64xf32>) outs(%3077 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3079 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_712 = tensor.insert_slice %3078 into %3079[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_713 = tensor.insert_slice %extracted_slice_710 into %inserted_slice_712[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3080 = tosa.mul %inserted_slice_713, %3075 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3081 = tosa.add %3076, %3080 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3082 = tosa.mul %3070, %3074 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_714 = tensor.extract_slice %3070[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_715 = tensor.extract_slice %3070[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3083 = tensor.empty() : tensor<1x32x40x64xf32> + %3084 = linalg.negf ins(%extracted_slice_715 : tensor<1x32x40x64xf32>) outs(%3083 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3085 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_716 = tensor.insert_slice %3084 into %3085[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_717 = tensor.insert_slice %extracted_slice_714 into %inserted_slice_716[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3086 = tosa.mul %inserted_slice_717, %3075 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3087 = tosa.add %3082, %3086 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3088 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %3089 = tosa.reshape %3088 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_718 = tensor.extract_slice %3089[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_719 = tensor.extract_slice %extracted_slice_718[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %3090 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %3091 = tosa.add %extracted_slice_719, %3090 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_720 = tensor.extract_slice %3091[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_721 = tensor.extract_slice %extracted_slice_720[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_722 = tensor.extract_slice %extracted_slice_721[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_723 = tensor.extract_slice %extracted_slice_722[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_724 = arith.constant 0.000000e+00 : f32 + %splat_725 = tensor.splat %cst_724 : tensor<40x40xf32> + %3092 = tosa.reshape %extracted_slice_723 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %3093 = tosa.add %splat_725, %3092 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %3094 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3095 = tosa.transpose %3087, %3094 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3096 = tosa.reshape %3081 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3097 = tosa.reshape %3095 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3098 = tosa.matmul %3096, %3097 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_726 = arith.constant 0.0883883461 : f32 + %splat_727 = tensor.splat %cst_726 : tensor<32x40x40xf32> + %3099 = tosa.mul %3098, %splat_727 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %3100 = tosa.add %3099, %3093 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %3101 = tosa.reduce_max %3100 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3102 = tosa.sub %3100, %3101 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3103 = math.exp %3102 : tensor<32x40x40xf32> + %3104 = tosa.reduce_sum %3103 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3105 = tosa.log %3104 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3106 = tosa.add %3101, %3105 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3107 = tosa.sub %3100, %3106 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3108 = math.exp %3107 : tensor<32x40x40xf32> + %3109 = tosa.reshape %3106 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %3110 = tosa.reshape %3073 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3111 = tosa.matmul %3108, %3110 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3112 = tosa.reshape %3111 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3113 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3114 = tosa.transpose %3112, %3113 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3115 = tosa.reshape %3114 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3116 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3117 = tosa.transpose %arg241, %3116 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3118 = tosa.reshape %3115 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_728 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3119 = linalg.matmul {cast = #linalg.type_fn} ins(%3118, %3117 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_728 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3120 = tosa.reshape %3119 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3121 = tosa.add %3037, %3120 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3122 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_729 = arith.constant 2 : i32 + %3123 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3121 : tensor<1x40x4096xf32>) outs(%3122 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_729 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3124 = tosa.reduce_sum %3123 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3125 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3126 = tosa.reciprocal %3125 : (tensor<1xf32>) -> tensor<1xf32> + %3127 = tosa.mul %3126, %3124 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3128 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3129 = tosa.add %3127, %3128 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3130 = tosa.rsqrt %3129 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3131 = tosa.mul %3121, %3130 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3132 = tosa.reshape %arg242 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3133 = tosa.mul %3132, %3131 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3134 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3135 = tosa.transpose %arg243, %3134 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3136 = tosa.reshape %3133 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_730 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3137 = linalg.matmul {cast = #linalg.type_fn} ins(%3136, %3135 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_730 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3138 = tosa.reshape %3137 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3139 = tosa.sigmoid %3138 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3140 = tosa.mul %3138, %3139 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3141 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3142 = tosa.transpose %arg244, %3141 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3143 = tosa.reshape %3133 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_731 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3144 = linalg.matmul {cast = #linalg.type_fn} ins(%3143, %3142 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_731 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3145 = tosa.reshape %3144 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3146 = tosa.mul %3140, %3145 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3147 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3148 = tosa.transpose %arg245, %3147 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3149 = tosa.reshape %3146 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_732 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3150 = linalg.matmul {cast = #linalg.type_fn} ins(%3149, %3148 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_732 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3151 = tosa.reshape %3150 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3152 = tosa.add %3121, %3151 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3153 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_733 = arith.constant 2 : i32 + %3154 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3152 : tensor<1x40x4096xf32>) outs(%3153 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_733 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3155 = tosa.reduce_sum %3154 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3156 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3157 = tosa.reciprocal %3156 : (tensor<1xf32>) -> tensor<1xf32> + %3158 = tosa.mul %3157, %3155 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3159 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3160 = tosa.add %3158, %3159 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3161 = tosa.rsqrt %3160 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3162 = tosa.mul %3152, %3161 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3163 = tosa.reshape %arg246 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3164 = tosa.mul %3163, %3162 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3165 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3166 = tosa.transpose %arg247, %3165 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3167 = tosa.reshape %3164 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_734 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3168 = linalg.matmul {cast = #linalg.type_fn} ins(%3167, %3166 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_734 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3169 = tosa.reshape %3168 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3170 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3171 = tosa.transpose %arg248, %3170 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3172 = tosa.reshape %3164 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_735 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3173 = linalg.matmul {cast = #linalg.type_fn} ins(%3172, %3171 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_735 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3174 = tosa.reshape %3173 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3175 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3176 = tosa.transpose %arg249, %3175 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3177 = tosa.reshape %3164 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_736 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3178 = linalg.matmul {cast = #linalg.type_fn} ins(%3177, %3176 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_736 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3179 = tosa.reshape %3178 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3180 = tosa.reshape %3169 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3181 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3182 = tosa.transpose %3180, %3181 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3183 = tosa.reshape %3174 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3184 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3185 = tosa.transpose %3183, %3184 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3186 = tosa.reshape %3179 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3187 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3188 = tosa.transpose %3186, %3187 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3189 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3190 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3191 = tosa.mul %3182, %3189 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_737 = tensor.extract_slice %3182[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_738 = tensor.extract_slice %3182[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3192 = tensor.empty() : tensor<1x32x40x64xf32> + %3193 = linalg.negf ins(%extracted_slice_738 : tensor<1x32x40x64xf32>) outs(%3192 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3194 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_739 = tensor.insert_slice %3193 into %3194[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_740 = tensor.insert_slice %extracted_slice_737 into %inserted_slice_739[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3195 = tosa.mul %inserted_slice_740, %3190 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3196 = tosa.add %3191, %3195 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3197 = tosa.mul %3185, %3189 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_741 = tensor.extract_slice %3185[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_742 = tensor.extract_slice %3185[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3198 = tensor.empty() : tensor<1x32x40x64xf32> + %3199 = linalg.negf ins(%extracted_slice_742 : tensor<1x32x40x64xf32>) outs(%3198 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3200 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_743 = tensor.insert_slice %3199 into %3200[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_744 = tensor.insert_slice %extracted_slice_741 into %inserted_slice_743[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3201 = tosa.mul %inserted_slice_744, %3190 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3202 = tosa.add %3197, %3201 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3203 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %3204 = tosa.reshape %3203 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_745 = tensor.extract_slice %3204[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_746 = tensor.extract_slice %extracted_slice_745[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %3205 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %3206 = tosa.add %extracted_slice_746, %3205 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_747 = tensor.extract_slice %3206[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_748 = tensor.extract_slice %extracted_slice_747[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_749 = tensor.extract_slice %extracted_slice_748[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_750 = tensor.extract_slice %extracted_slice_749[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_751 = arith.constant 0.000000e+00 : f32 + %splat_752 = tensor.splat %cst_751 : tensor<40x40xf32> + %3207 = tosa.reshape %extracted_slice_750 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %3208 = tosa.add %splat_752, %3207 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %3209 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3210 = tosa.transpose %3202, %3209 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3211 = tosa.reshape %3196 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3212 = tosa.reshape %3210 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3213 = tosa.matmul %3211, %3212 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_753 = arith.constant 0.0883883461 : f32 + %splat_754 = tensor.splat %cst_753 : tensor<32x40x40xf32> + %3214 = tosa.mul %3213, %splat_754 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %3215 = tosa.add %3214, %3208 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %3216 = tosa.reduce_max %3215 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3217 = tosa.sub %3215, %3216 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3218 = math.exp %3217 : tensor<32x40x40xf32> + %3219 = tosa.reduce_sum %3218 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3220 = tosa.log %3219 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3221 = tosa.add %3216, %3220 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3222 = tosa.sub %3215, %3221 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3223 = math.exp %3222 : tensor<32x40x40xf32> + %3224 = tosa.reshape %3221 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %3225 = tosa.reshape %3188 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3226 = tosa.matmul %3223, %3225 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3227 = tosa.reshape %3226 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3228 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3229 = tosa.transpose %3227, %3228 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3230 = tosa.reshape %3229 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3231 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3232 = tosa.transpose %arg250, %3231 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3233 = tosa.reshape %3230 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_755 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3234 = linalg.matmul {cast = #linalg.type_fn} ins(%3233, %3232 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_755 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3235 = tosa.reshape %3234 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3236 = tosa.add %3152, %3235 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3237 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_756 = arith.constant 2 : i32 + %3238 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3236 : tensor<1x40x4096xf32>) outs(%3237 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_756 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3239 = tosa.reduce_sum %3238 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3240 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3241 = tosa.reciprocal %3240 : (tensor<1xf32>) -> tensor<1xf32> + %3242 = tosa.mul %3241, %3239 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3243 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3244 = tosa.add %3242, %3243 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3245 = tosa.rsqrt %3244 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3246 = tosa.mul %3236, %3245 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3247 = tosa.reshape %arg251 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3248 = tosa.mul %3247, %3246 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3249 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3250 = tosa.transpose %arg252, %3249 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3251 = tosa.reshape %3248 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_757 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3252 = linalg.matmul {cast = #linalg.type_fn} ins(%3251, %3250 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_757 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3253 = tosa.reshape %3252 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3254 = tosa.sigmoid %3253 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3255 = tosa.mul %3253, %3254 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3256 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3257 = tosa.transpose %arg253, %3256 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3258 = tosa.reshape %3248 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_758 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3259 = linalg.matmul {cast = #linalg.type_fn} ins(%3258, %3257 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_758 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3260 = tosa.reshape %3259 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3261 = tosa.mul %3255, %3260 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3262 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3263 = tosa.transpose %arg254, %3262 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3264 = tosa.reshape %3261 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_759 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3265 = linalg.matmul {cast = #linalg.type_fn} ins(%3264, %3263 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_759 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3266 = tosa.reshape %3265 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3267 = tosa.add %3236, %3266 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3268 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_760 = arith.constant 2 : i32 + %3269 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3267 : tensor<1x40x4096xf32>) outs(%3268 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_760 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3270 = tosa.reduce_sum %3269 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3271 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3272 = tosa.reciprocal %3271 : (tensor<1xf32>) -> tensor<1xf32> + %3273 = tosa.mul %3272, %3270 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3274 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3275 = tosa.add %3273, %3274 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3276 = tosa.rsqrt %3275 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3277 = tosa.mul %3267, %3276 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3278 = tosa.reshape %arg255 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3279 = tosa.mul %3278, %3277 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3280 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3281 = tosa.transpose %arg256, %3280 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3282 = tosa.reshape %3279 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_761 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3283 = linalg.matmul {cast = #linalg.type_fn} ins(%3282, %3281 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_761 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3284 = tosa.reshape %3283 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3285 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3286 = tosa.transpose %arg257, %3285 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3287 = tosa.reshape %3279 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_762 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3288 = linalg.matmul {cast = #linalg.type_fn} ins(%3287, %3286 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_762 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3289 = tosa.reshape %3288 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3290 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3291 = tosa.transpose %arg258, %3290 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3292 = tosa.reshape %3279 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_763 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3293 = linalg.matmul {cast = #linalg.type_fn} ins(%3292, %3291 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_763 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3294 = tosa.reshape %3293 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3295 = tosa.reshape %3284 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3296 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3297 = tosa.transpose %3295, %3296 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3298 = tosa.reshape %3289 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3299 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3300 = tosa.transpose %3298, %3299 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3301 = tosa.reshape %3294 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3302 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3303 = tosa.transpose %3301, %3302 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3304 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3305 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3306 = tosa.mul %3297, %3304 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_764 = tensor.extract_slice %3297[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_765 = tensor.extract_slice %3297[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3307 = tensor.empty() : tensor<1x32x40x64xf32> + %3308 = linalg.negf ins(%extracted_slice_765 : tensor<1x32x40x64xf32>) outs(%3307 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3309 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_766 = tensor.insert_slice %3308 into %3309[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_767 = tensor.insert_slice %extracted_slice_764 into %inserted_slice_766[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3310 = tosa.mul %inserted_slice_767, %3305 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3311 = tosa.add %3306, %3310 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3312 = tosa.mul %3300, %3304 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_768 = tensor.extract_slice %3300[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_769 = tensor.extract_slice %3300[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3313 = tensor.empty() : tensor<1x32x40x64xf32> + %3314 = linalg.negf ins(%extracted_slice_769 : tensor<1x32x40x64xf32>) outs(%3313 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3315 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_770 = tensor.insert_slice %3314 into %3315[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_771 = tensor.insert_slice %extracted_slice_768 into %inserted_slice_770[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3316 = tosa.mul %inserted_slice_771, %3305 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3317 = tosa.add %3312, %3316 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3318 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %3319 = tosa.reshape %3318 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_772 = tensor.extract_slice %3319[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_773 = tensor.extract_slice %extracted_slice_772[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %3320 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %3321 = tosa.add %extracted_slice_773, %3320 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_774 = tensor.extract_slice %3321[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_775 = tensor.extract_slice %extracted_slice_774[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_776 = tensor.extract_slice %extracted_slice_775[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_777 = tensor.extract_slice %extracted_slice_776[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_778 = arith.constant 0.000000e+00 : f32 + %splat_779 = tensor.splat %cst_778 : tensor<40x40xf32> + %3322 = tosa.reshape %extracted_slice_777 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %3323 = tosa.add %splat_779, %3322 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %3324 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3325 = tosa.transpose %3317, %3324 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3326 = tosa.reshape %3311 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3327 = tosa.reshape %3325 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3328 = tosa.matmul %3326, %3327 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_780 = arith.constant 0.0883883461 : f32 + %splat_781 = tensor.splat %cst_780 : tensor<32x40x40xf32> + %3329 = tosa.mul %3328, %splat_781 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %3330 = tosa.add %3329, %3323 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %3331 = tosa.reduce_max %3330 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3332 = tosa.sub %3330, %3331 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3333 = math.exp %3332 : tensor<32x40x40xf32> + %3334 = tosa.reduce_sum %3333 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3335 = tosa.log %3334 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3336 = tosa.add %3331, %3335 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3337 = tosa.sub %3330, %3336 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3338 = math.exp %3337 : tensor<32x40x40xf32> + %3339 = tosa.reshape %3336 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %3340 = tosa.reshape %3303 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3341 = tosa.matmul %3338, %3340 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3342 = tosa.reshape %3341 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3343 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3344 = tosa.transpose %3342, %3343 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3345 = tosa.reshape %3344 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3346 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3347 = tosa.transpose %arg259, %3346 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3348 = tosa.reshape %3345 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_782 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3349 = linalg.matmul {cast = #linalg.type_fn} ins(%3348, %3347 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_782 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3350 = tosa.reshape %3349 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3351 = tosa.add %3267, %3350 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3352 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_783 = arith.constant 2 : i32 + %3353 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3351 : tensor<1x40x4096xf32>) outs(%3352 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_783 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3354 = tosa.reduce_sum %3353 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3355 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3356 = tosa.reciprocal %3355 : (tensor<1xf32>) -> tensor<1xf32> + %3357 = tosa.mul %3356, %3354 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3358 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3359 = tosa.add %3357, %3358 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3360 = tosa.rsqrt %3359 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3361 = tosa.mul %3351, %3360 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3362 = tosa.reshape %arg260 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3363 = tosa.mul %3362, %3361 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3364 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3365 = tosa.transpose %arg261, %3364 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3366 = tosa.reshape %3363 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_784 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3367 = linalg.matmul {cast = #linalg.type_fn} ins(%3366, %3365 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_784 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3368 = tosa.reshape %3367 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3369 = tosa.sigmoid %3368 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3370 = tosa.mul %3368, %3369 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3371 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3372 = tosa.transpose %arg262, %3371 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3373 = tosa.reshape %3363 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_785 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3374 = linalg.matmul {cast = #linalg.type_fn} ins(%3373, %3372 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_785 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3375 = tosa.reshape %3374 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3376 = tosa.mul %3370, %3375 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3377 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3378 = tosa.transpose %arg263, %3377 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3379 = tosa.reshape %3376 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_786 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3380 = linalg.matmul {cast = #linalg.type_fn} ins(%3379, %3378 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_786 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3381 = tosa.reshape %3380 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3382 = tosa.add %3351, %3381 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3383 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_787 = arith.constant 2 : i32 + %3384 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3382 : tensor<1x40x4096xf32>) outs(%3383 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_787 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3385 = tosa.reduce_sum %3384 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3386 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3387 = tosa.reciprocal %3386 : (tensor<1xf32>) -> tensor<1xf32> + %3388 = tosa.mul %3387, %3385 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3389 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3390 = tosa.add %3388, %3389 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3391 = tosa.rsqrt %3390 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3392 = tosa.mul %3382, %3391 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3393 = tosa.reshape %arg264 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3394 = tosa.mul %3393, %3392 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3395 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3396 = tosa.transpose %arg265, %3395 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3397 = tosa.reshape %3394 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_788 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3398 = linalg.matmul {cast = #linalg.type_fn} ins(%3397, %3396 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_788 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3399 = tosa.reshape %3398 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3400 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3401 = tosa.transpose %arg266, %3400 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3402 = tosa.reshape %3394 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_789 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3403 = linalg.matmul {cast = #linalg.type_fn} ins(%3402, %3401 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_789 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3404 = tosa.reshape %3403 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3405 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3406 = tosa.transpose %arg267, %3405 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3407 = tosa.reshape %3394 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_790 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3408 = linalg.matmul {cast = #linalg.type_fn} ins(%3407, %3406 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_790 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3409 = tosa.reshape %3408 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3410 = tosa.reshape %3399 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3411 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3412 = tosa.transpose %3410, %3411 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3413 = tosa.reshape %3404 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3414 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3415 = tosa.transpose %3413, %3414 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3416 = tosa.reshape %3409 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3417 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3418 = tosa.transpose %3416, %3417 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3419 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3420 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3421 = tosa.mul %3412, %3419 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_791 = tensor.extract_slice %3412[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_792 = tensor.extract_slice %3412[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3422 = tensor.empty() : tensor<1x32x40x64xf32> + %3423 = linalg.negf ins(%extracted_slice_792 : tensor<1x32x40x64xf32>) outs(%3422 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3424 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_793 = tensor.insert_slice %3423 into %3424[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_794 = tensor.insert_slice %extracted_slice_791 into %inserted_slice_793[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3425 = tosa.mul %inserted_slice_794, %3420 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3426 = tosa.add %3421, %3425 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3427 = tosa.mul %3415, %3419 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_795 = tensor.extract_slice %3415[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_796 = tensor.extract_slice %3415[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3428 = tensor.empty() : tensor<1x32x40x64xf32> + %3429 = linalg.negf ins(%extracted_slice_796 : tensor<1x32x40x64xf32>) outs(%3428 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3430 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_797 = tensor.insert_slice %3429 into %3430[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_798 = tensor.insert_slice %extracted_slice_795 into %inserted_slice_797[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3431 = tosa.mul %inserted_slice_798, %3420 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3432 = tosa.add %3427, %3431 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3433 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %3434 = tosa.reshape %3433 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_799 = tensor.extract_slice %3434[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_800 = tensor.extract_slice %extracted_slice_799[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %3435 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %3436 = tosa.add %extracted_slice_800, %3435 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_801 = tensor.extract_slice %3436[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_802 = tensor.extract_slice %extracted_slice_801[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_803 = tensor.extract_slice %extracted_slice_802[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_804 = tensor.extract_slice %extracted_slice_803[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_805 = arith.constant 0.000000e+00 : f32 + %splat_806 = tensor.splat %cst_805 : tensor<40x40xf32> + %3437 = tosa.reshape %extracted_slice_804 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %3438 = tosa.add %splat_806, %3437 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %3439 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3440 = tosa.transpose %3432, %3439 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3441 = tosa.reshape %3426 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3442 = tosa.reshape %3440 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3443 = tosa.matmul %3441, %3442 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_807 = arith.constant 0.0883883461 : f32 + %splat_808 = tensor.splat %cst_807 : tensor<32x40x40xf32> + %3444 = tosa.mul %3443, %splat_808 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %3445 = tosa.add %3444, %3438 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %3446 = tosa.reduce_max %3445 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3447 = tosa.sub %3445, %3446 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3448 = math.exp %3447 : tensor<32x40x40xf32> + %3449 = tosa.reduce_sum %3448 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3450 = tosa.log %3449 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3451 = tosa.add %3446, %3450 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3452 = tosa.sub %3445, %3451 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3453 = math.exp %3452 : tensor<32x40x40xf32> + %3454 = tosa.reshape %3451 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %3455 = tosa.reshape %3418 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3456 = tosa.matmul %3453, %3455 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3457 = tosa.reshape %3456 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3458 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3459 = tosa.transpose %3457, %3458 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3460 = tosa.reshape %3459 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3461 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3462 = tosa.transpose %arg268, %3461 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3463 = tosa.reshape %3460 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_809 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3464 = linalg.matmul {cast = #linalg.type_fn} ins(%3463, %3462 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_809 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3465 = tosa.reshape %3464 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3466 = tosa.add %3382, %3465 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3467 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_810 = arith.constant 2 : i32 + %3468 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3466 : tensor<1x40x4096xf32>) outs(%3467 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_810 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3469 = tosa.reduce_sum %3468 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3470 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3471 = tosa.reciprocal %3470 : (tensor<1xf32>) -> tensor<1xf32> + %3472 = tosa.mul %3471, %3469 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3473 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3474 = tosa.add %3472, %3473 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3475 = tosa.rsqrt %3474 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3476 = tosa.mul %3466, %3475 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3477 = tosa.reshape %arg269 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3478 = tosa.mul %3477, %3476 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3479 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3480 = tosa.transpose %arg270, %3479 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3481 = tosa.reshape %3478 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_811 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3482 = linalg.matmul {cast = #linalg.type_fn} ins(%3481, %3480 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_811 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3483 = tosa.reshape %3482 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3484 = tosa.sigmoid %3483 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3485 = tosa.mul %3483, %3484 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3486 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3487 = tosa.transpose %arg271, %3486 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3488 = tosa.reshape %3478 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_812 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3489 = linalg.matmul {cast = #linalg.type_fn} ins(%3488, %3487 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_812 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3490 = tosa.reshape %3489 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3491 = tosa.mul %3485, %3490 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3492 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3493 = tosa.transpose %arg272, %3492 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3494 = tosa.reshape %3491 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_813 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3495 = linalg.matmul {cast = #linalg.type_fn} ins(%3494, %3493 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_813 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3496 = tosa.reshape %3495 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3497 = tosa.add %3466, %3496 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3498 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_814 = arith.constant 2 : i32 + %3499 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3497 : tensor<1x40x4096xf32>) outs(%3498 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_814 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3500 = tosa.reduce_sum %3499 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3501 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3502 = tosa.reciprocal %3501 : (tensor<1xf32>) -> tensor<1xf32> + %3503 = tosa.mul %3502, %3500 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3504 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3505 = tosa.add %3503, %3504 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3506 = tosa.rsqrt %3505 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3507 = tosa.mul %3497, %3506 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3508 = tosa.reshape %arg273 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3509 = tosa.mul %3508, %3507 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3510 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3511 = tosa.transpose %arg274, %3510 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3512 = tosa.reshape %3509 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_815 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3513 = linalg.matmul {cast = #linalg.type_fn} ins(%3512, %3511 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_815 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3514 = tosa.reshape %3513 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3515 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3516 = tosa.transpose %arg275, %3515 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3517 = tosa.reshape %3509 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_816 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3518 = linalg.matmul {cast = #linalg.type_fn} ins(%3517, %3516 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_816 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3519 = tosa.reshape %3518 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3520 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3521 = tosa.transpose %arg276, %3520 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3522 = tosa.reshape %3509 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_817 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3523 = linalg.matmul {cast = #linalg.type_fn} ins(%3522, %3521 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_817 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3524 = tosa.reshape %3523 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3525 = tosa.reshape %3514 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3526 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3527 = tosa.transpose %3525, %3526 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3528 = tosa.reshape %3519 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3529 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3530 = tosa.transpose %3528, %3529 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3531 = tosa.reshape %3524 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3532 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3533 = tosa.transpose %3531, %3532 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3534 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3535 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3536 = tosa.mul %3527, %3534 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_818 = tensor.extract_slice %3527[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_819 = tensor.extract_slice %3527[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3537 = tensor.empty() : tensor<1x32x40x64xf32> + %3538 = linalg.negf ins(%extracted_slice_819 : tensor<1x32x40x64xf32>) outs(%3537 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3539 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_820 = tensor.insert_slice %3538 into %3539[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_821 = tensor.insert_slice %extracted_slice_818 into %inserted_slice_820[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3540 = tosa.mul %inserted_slice_821, %3535 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3541 = tosa.add %3536, %3540 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3542 = tosa.mul %3530, %3534 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_822 = tensor.extract_slice %3530[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_823 = tensor.extract_slice %3530[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3543 = tensor.empty() : tensor<1x32x40x64xf32> + %3544 = linalg.negf ins(%extracted_slice_823 : tensor<1x32x40x64xf32>) outs(%3543 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3545 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_824 = tensor.insert_slice %3544 into %3545[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_825 = tensor.insert_slice %extracted_slice_822 into %inserted_slice_824[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3546 = tosa.mul %inserted_slice_825, %3535 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3547 = tosa.add %3542, %3546 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3548 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %3549 = tosa.reshape %3548 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_826 = tensor.extract_slice %3549[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_827 = tensor.extract_slice %extracted_slice_826[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %3550 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %3551 = tosa.add %extracted_slice_827, %3550 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_828 = tensor.extract_slice %3551[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_829 = tensor.extract_slice %extracted_slice_828[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_830 = tensor.extract_slice %extracted_slice_829[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_831 = tensor.extract_slice %extracted_slice_830[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_832 = arith.constant 0.000000e+00 : f32 + %splat_833 = tensor.splat %cst_832 : tensor<40x40xf32> + %3552 = tosa.reshape %extracted_slice_831 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %3553 = tosa.add %splat_833, %3552 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %3554 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3555 = tosa.transpose %3547, %3554 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3556 = tosa.reshape %3541 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3557 = tosa.reshape %3555 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3558 = tosa.matmul %3556, %3557 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_834 = arith.constant 0.0883883461 : f32 + %splat_835 = tensor.splat %cst_834 : tensor<32x40x40xf32> + %3559 = tosa.mul %3558, %splat_835 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %3560 = tosa.add %3559, %3553 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %3561 = tosa.reduce_max %3560 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3562 = tosa.sub %3560, %3561 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3563 = math.exp %3562 : tensor<32x40x40xf32> + %3564 = tosa.reduce_sum %3563 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3565 = tosa.log %3564 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3566 = tosa.add %3561, %3565 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3567 = tosa.sub %3560, %3566 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3568 = math.exp %3567 : tensor<32x40x40xf32> + %3569 = tosa.reshape %3566 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %3570 = tosa.reshape %3533 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3571 = tosa.matmul %3568, %3570 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3572 = tosa.reshape %3571 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3573 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3574 = tosa.transpose %3572, %3573 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3575 = tosa.reshape %3574 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3576 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3577 = tosa.transpose %arg277, %3576 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3578 = tosa.reshape %3575 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_836 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3579 = linalg.matmul {cast = #linalg.type_fn} ins(%3578, %3577 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_836 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3580 = tosa.reshape %3579 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3581 = tosa.add %3497, %3580 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3582 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_837 = arith.constant 2 : i32 + %3583 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3581 : tensor<1x40x4096xf32>) outs(%3582 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_837 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3584 = tosa.reduce_sum %3583 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3585 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3586 = tosa.reciprocal %3585 : (tensor<1xf32>) -> tensor<1xf32> + %3587 = tosa.mul %3586, %3584 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3588 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3589 = tosa.add %3587, %3588 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3590 = tosa.rsqrt %3589 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3591 = tosa.mul %3581, %3590 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3592 = tosa.reshape %arg278 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3593 = tosa.mul %3592, %3591 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3594 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3595 = tosa.transpose %arg279, %3594 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3596 = tosa.reshape %3593 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_838 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3597 = linalg.matmul {cast = #linalg.type_fn} ins(%3596, %3595 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_838 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3598 = tosa.reshape %3597 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3599 = tosa.sigmoid %3598 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3600 = tosa.mul %3598, %3599 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3601 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3602 = tosa.transpose %arg280, %3601 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3603 = tosa.reshape %3593 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_839 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3604 = linalg.matmul {cast = #linalg.type_fn} ins(%3603, %3602 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_839 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3605 = tosa.reshape %3604 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3606 = tosa.mul %3600, %3605 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3607 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3608 = tosa.transpose %arg281, %3607 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3609 = tosa.reshape %3606 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_840 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3610 = linalg.matmul {cast = #linalg.type_fn} ins(%3609, %3608 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_840 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3611 = tosa.reshape %3610 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3612 = tosa.add %3581, %3611 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3613 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_841 = arith.constant 2 : i32 + %3614 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3612 : tensor<1x40x4096xf32>) outs(%3613 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_841 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3615 = tosa.reduce_sum %3614 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3616 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3617 = tosa.reciprocal %3616 : (tensor<1xf32>) -> tensor<1xf32> + %3618 = tosa.mul %3617, %3615 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3619 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3620 = tosa.add %3618, %3619 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3621 = tosa.rsqrt %3620 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3622 = tosa.mul %3612, %3621 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3623 = tosa.reshape %arg282 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3624 = tosa.mul %3623, %3622 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3625 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3626 = tosa.transpose %arg283, %3625 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3627 = tosa.reshape %3624 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_842 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3628 = linalg.matmul {cast = #linalg.type_fn} ins(%3627, %3626 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_842 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3629 = tosa.reshape %3628 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3630 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3631 = tosa.transpose %arg284, %3630 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3632 = tosa.reshape %3624 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_843 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3633 = linalg.matmul {cast = #linalg.type_fn} ins(%3632, %3631 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_843 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3634 = tosa.reshape %3633 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3635 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3636 = tosa.transpose %arg285, %3635 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3637 = tosa.reshape %3624 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_844 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3638 = linalg.matmul {cast = #linalg.type_fn} ins(%3637, %3636 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_844 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3639 = tosa.reshape %3638 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3640 = tosa.reshape %3629 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3641 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3642 = tosa.transpose %3640, %3641 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3643 = tosa.reshape %3634 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3644 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3645 = tosa.transpose %3643, %3644 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3646 = tosa.reshape %3639 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<1x40x32x128xf32> + %3647 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3648 = tosa.transpose %3646, %3647 : (tensor<1x40x32x128xf32>, tensor<4xi32>) -> tensor<1x32x40x128xf32> + %3649 = tosa.reshape %45 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3650 = tosa.reshape %47 {new_shape = array} : (tensor<1x40x128xf32>) -> tensor<1x1x40x128xf32> + %3651 = tosa.mul %3642, %3649 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_845 = tensor.extract_slice %3642[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_846 = tensor.extract_slice %3642[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3652 = tensor.empty() : tensor<1x32x40x64xf32> + %3653 = linalg.negf ins(%extracted_slice_846 : tensor<1x32x40x64xf32>) outs(%3652 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3654 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_847 = tensor.insert_slice %3653 into %3654[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_848 = tensor.insert_slice %extracted_slice_845 into %inserted_slice_847[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3655 = tosa.mul %inserted_slice_848, %3650 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3656 = tosa.add %3651, %3655 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3657 = tosa.mul %3645, %3649 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %extracted_slice_849 = tensor.extract_slice %3645[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %extracted_slice_850 = tensor.extract_slice %3645[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x128xf32> to tensor<1x32x40x64xf32> + %3658 = tensor.empty() : tensor<1x32x40x64xf32> + %3659 = linalg.negf ins(%extracted_slice_850 : tensor<1x32x40x64xf32>) outs(%3658 : tensor<1x32x40x64xf32>) -> tensor<1x32x40x64xf32> + %3660 = tensor.empty() : tensor<1x32x40x128xf32> + %inserted_slice_851 = tensor.insert_slice %3659 into %3660[0, 0, 0, 0] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %inserted_slice_852 = tensor.insert_slice %extracted_slice_849 into %inserted_slice_851[0, 0, 0, 64] [1, 32, 40, 64] [1, 1, 1, 1] : tensor<1x32x40x64xf32> into tensor<1x32x40x128xf32> + %3661 = tosa.mul %inserted_slice_852, %3650 {shift = 0 : i8} : (tensor<1x32x40x128xf32>, tensor<1x1x40x128xf32>) -> tensor<1x32x40x128xf32> + %3662 = tosa.add %3657, %3661 : (tensor<1x32x40x128xf32>, tensor<1x32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3663 = tosa.reshape %19 {new_shape = array} : (tensor<40x41xf32>) -> tensor<1x40x41xf32> + %3664 = tosa.reshape %3663 {new_shape = array} : (tensor<1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_853 = tensor.extract_slice %3664[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_854 = tensor.extract_slice %extracted_slice_853[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %3665 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x1x40x41xf32>}> : () -> tensor<1x1x40x41xf32> + %3666 = tosa.add %extracted_slice_854, %3665 : (tensor<1x1x40x41xf32>, tensor<1x1x40x41xf32>) -> tensor<1x1x40x41xf32> + %extracted_slice_855 = tensor.extract_slice %3666[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_856 = tensor.extract_slice %extracted_slice_855[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_857 = tensor.extract_slice %extracted_slice_856[0, 0, 0, 0] [1, 1, 40, 41] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x41xf32> + %extracted_slice_858 = tensor.extract_slice %extracted_slice_857[0, 0, 0, 0] [1, 1, 40, 40] [1, 1, 1, 1] : tensor<1x1x40x41xf32> to tensor<1x1x40x40xf32> + %cst_859 = arith.constant 0.000000e+00 : f32 + %splat_860 = tensor.splat %cst_859 : tensor<40x40xf32> + %3667 = tosa.reshape %extracted_slice_858 {new_shape = array} : (tensor<1x1x40x40xf32>) -> tensor<40x40xf32> + %3668 = tosa.add %splat_860, %3667 : (tensor<40x40xf32>, tensor<40x40xf32>) -> tensor<40x40xf32> + %3669 = "tosa.const"() <{value = dense<[0, 1, 3, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3670 = tosa.transpose %3662, %3669 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x32x128x40xf32> + %3671 = tosa.reshape %3656 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3672 = tosa.reshape %3670 {new_shape = array} : (tensor<1x32x128x40xf32>) -> tensor<32x128x40xf32> + %3673 = tosa.matmul %3671, %3672 : (tensor<32x40x128xf32>, tensor<32x128x40xf32>) -> tensor<32x40x40xf32> + %cst_861 = arith.constant 0.0883883461 : f32 + %splat_862 = tensor.splat %cst_861 : tensor<32x40x40xf32> + %3674 = tosa.mul %3673, %splat_862 {shift = 0 : i8} : (tensor<32x40x40xf32>, tensor<32x40x40xf32>) -> tensor<32x40x40xf32> + %3675 = tosa.add %3674, %3668 : (tensor<32x40x40xf32>, tensor<40x40xf32>) -> tensor<32x40x40xf32> + %3676 = tosa.reduce_max %3675 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3677 = tosa.sub %3675, %3676 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3678 = math.exp %3677 : tensor<32x40x40xf32> + %3679 = tosa.reduce_sum %3678 {axis = 2 : i32} : (tensor<32x40x40xf32>) -> tensor<32x40x1xf32> + %3680 = tosa.log %3679 : (tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3681 = tosa.add %3676, %3680 : (tensor<32x40x1xf32>, tensor<32x40x1xf32>) -> tensor<32x40x1xf32> + %3682 = tosa.sub %3675, %3681 : (tensor<32x40x40xf32>, tensor<32x40x1xf32>) -> tensor<32x40x40xf32> + %3683 = math.exp %3682 : tensor<32x40x40xf32> + %3684 = tosa.reshape %3681 {new_shape = array} : (tensor<32x40x1xf32>) -> tensor<1x32x40xf32> + %3685 = tosa.reshape %3648 {new_shape = array} : (tensor<1x32x40x128xf32>) -> tensor<32x40x128xf32> + %3686 = tosa.matmul %3683, %3685 : (tensor<32x40x40xf32>, tensor<32x40x128xf32>) -> tensor<32x40x128xf32> + %3687 = tosa.reshape %3686 {new_shape = array} : (tensor<32x40x128xf32>) -> tensor<1x32x40x128xf32> + %3688 = "tosa.const"() <{value = dense<[0, 2, 1, 3]> : tensor<4xi32>}> : () -> tensor<4xi32> + %3689 = tosa.transpose %3687, %3688 : (tensor<1x32x40x128xf32>, tensor<4xi32>) -> tensor<1x40x32x128xf32> + %3690 = tosa.reshape %3689 {new_shape = array} : (tensor<1x40x32x128xf32>) -> tensor<1x40x4096xf32> + %3691 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3692 = tosa.transpose %arg286, %3691 : (tensor<4096x4096xf32>, tensor<2xi32>) -> tensor<4096x4096xf32> + %3693 = tosa.reshape %3690 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_863 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3694 = linalg.matmul {cast = #linalg.type_fn} ins(%3693, %3692 : tensor<40x4096xf32>, tensor<4096x4096xf32>) outs(%cst_863 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3695 = tosa.reshape %3694 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3696 = tosa.add %3612, %3695 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3697 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_864 = arith.constant 2 : i32 + %3698 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3696 : tensor<1x40x4096xf32>) outs(%3697 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_864 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3699 = tosa.reduce_sum %3698 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3700 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3701 = tosa.reciprocal %3700 : (tensor<1xf32>) -> tensor<1xf32> + %3702 = tosa.mul %3701, %3699 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3703 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3704 = tosa.add %3702, %3703 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3705 = tosa.rsqrt %3704 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3706 = tosa.mul %3696, %3705 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3707 = tosa.reshape %arg287 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3708 = tosa.mul %3707, %3706 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3709 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3710 = tosa.transpose %arg288, %3709 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3711 = tosa.reshape %3708 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_865 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3712 = linalg.matmul {cast = #linalg.type_fn} ins(%3711, %3710 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_865 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3713 = tosa.reshape %3712 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3714 = tosa.sigmoid %3713 : (tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3715 = tosa.mul %3713, %3714 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3716 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3717 = tosa.transpose %arg289, %3716 : (tensor<11008x4096xf32>, tensor<2xi32>) -> tensor<4096x11008xf32> + %3718 = tosa.reshape %3708 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_866 = arith.constant dense<0.000000e+00> : tensor<40x11008xf32> + %3719 = linalg.matmul {cast = #linalg.type_fn} ins(%3718, %3717 : tensor<40x4096xf32>, tensor<4096x11008xf32>) outs(%cst_866 : tensor<40x11008xf32>) -> tensor<40x11008xf32> + %3720 = tosa.reshape %3719 {new_shape = array} : (tensor<40x11008xf32>) -> tensor<1x40x11008xf32> + %3721 = tosa.mul %3715, %3720 {shift = 0 : i8} : (tensor<1x40x11008xf32>, tensor<1x40x11008xf32>) -> tensor<1x40x11008xf32> + %3722 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3723 = tosa.transpose %arg290, %3722 : (tensor<4096x11008xf32>, tensor<2xi32>) -> tensor<11008x4096xf32> + %3724 = tosa.reshape %3721 {new_shape = array} : (tensor<1x40x11008xf32>) -> tensor<40x11008xf32> + %cst_867 = arith.constant dense<0.000000e+00> : tensor<40x4096xf32> + %3725 = linalg.matmul {cast = #linalg.type_fn} ins(%3724, %3723 : tensor<40x11008xf32>, tensor<11008x4096xf32>) outs(%cst_867 : tensor<40x4096xf32>) -> tensor<40x4096xf32> + %3726 = tosa.reshape %3725 {new_shape = array} : (tensor<40x4096xf32>) -> tensor<1x40x4096xf32> + %3727 = tosa.add %3696, %3726 : (tensor<1x40x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %3728 = tensor.empty() : tensor<1x40x4096xf32> + %c2_i32_868 = arith.constant 2 : i32 + %3729 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3727 : tensor<1x40x4096xf32>) outs(%3728 : tensor<1x40x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %3745 = math.fpowi %in, %c2_i32_868 : f32, i32 + linalg.yield %3745 : f32 + } -> tensor<1x40x4096xf32> + %3730 = tosa.reduce_sum %3729 {axis = 2 : i32} : (tensor<1x40x4096xf32>) -> tensor<1x40x1xf32> + %3731 = "tosa.const"() <{value = dense<4.096000e+03> : tensor<1xf32>}> : () -> tensor<1xf32> + %3732 = tosa.reciprocal %3731 : (tensor<1xf32>) -> tensor<1xf32> + %3733 = tosa.mul %3732, %3730 {shift = 0 : i8} : (tensor<1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3734 = "tosa.const"() <{value = dense<9.99999974E-6> : tensor<1x40x1xf32>}> : () -> tensor<1x40x1xf32> + %3735 = tosa.add %3733, %3734 : (tensor<1x40x1xf32>, tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3736 = tosa.rsqrt %3735 : (tensor<1x40x1xf32>) -> tensor<1x40x1xf32> + %3737 = tosa.mul %3727, %3736 {shift = 0 : i8} : (tensor<1x40x4096xf32>, tensor<1x40x1xf32>) -> tensor<1x40x4096xf32> + %3738 = tosa.reshape %arg291 {new_shape = array} : (tensor<4096xf32>) -> tensor<1x1x4096xf32> + %3739 = tosa.mul %3738, %3737 {shift = 0 : i8} : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>) -> tensor<1x40x4096xf32> + %extracted_slice_869 = tensor.extract_slice %3739[0, 0, 0] [1, 40, 4096] [1, 1, 1] : tensor<1x40x4096xf32> to tensor<1x40x4096xf32> + %extracted_slice_870 = tensor.extract_slice %extracted_slice_869[0, 0, 0] [1, 40, 4096] [1, 1, 1] : tensor<1x40x4096xf32> to tensor<1x40x4096xf32> + %extracted_slice_871 = tensor.extract_slice %extracted_slice_870[0, 0, 0] [1, 40, 4096] [1, 1, 1] : tensor<1x40x4096xf32> to tensor<1x40x4096xf32> + %3740 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> + %3741 = tosa.transpose %arg292, %3740 : (tensor<32000x4096xf32>, tensor<2xi32>) -> tensor<4096x32000xf32> + %3742 = tosa.reshape %extracted_slice_871 {new_shape = array} : (tensor<1x40x4096xf32>) -> tensor<40x4096xf32> + %cst_872 = arith.constant dense<0.000000e+00> : tensor<40x32000xf32> + %3743 = linalg.matmul {cast = #linalg.type_fn} ins(%3742, %3741 : tensor<40x4096xf32>, tensor<4096x32000xf32>) outs(%cst_872 : tensor<40x32000xf32>) -> tensor<40x32000xf32> + %3744 = tosa.reshape %3743 {new_shape = array} : (tensor<40x32000xf32>) -> tensor<1x40x32000xf32> + return %3744 : tensor<1x40x32000xf32> + } +} + diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index acdbeced..1fc46e48 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -12,6 +12,10 @@ if(BUDDY_DIS_LLAMA_EXAMPLES) add_subdirectory(LlamaTest) endif() +if(BUDDY_SPLIT_LLAMA_EXAMPLES) + add_subdirectory(SplitLlama) +endif() + if(BUDDY_DEEPSEEKR1_EXAMPLES) add_subdirectory(BuddyDeepSeekR1) endif() diff --git a/examples/LlamaTest/.gitignore b/examples/LlamaTest/.gitignore index 817e9f3d..48f9a4fc 100644 --- a/examples/LlamaTest/.gitignore +++ b/examples/LlamaTest/.gitignore @@ -1,5 +1,5 @@ # model params file -arg0.data +*.data vocab.txt # model mlir file diff --git a/examples/LlamaTest/CMakeLists.txt b/examples/LlamaTest/CMakeLists.txt index cdb4679c..fd6c795f 100644 --- a/examples/LlamaTest/CMakeLists.txt +++ b/examples/LlamaTest/CMakeLists.txt @@ -3,9 +3,14 @@ add_custom_command( ${CMAKE_CURRENT_BINARY_DIR}/subgraph0.mlir ${CMAKE_CURRENT_BINARY_DIR}/forward1.mlir ${CMAKE_CURRENT_BINARY_DIR}/subgraph1.mlir - ${CMAKE_CURRENT_BINARY_DIR}/forward33.mlir - ${CMAKE_CURRENT_BINARY_DIR}/subgraph33.mlir - ${CMAKE_CURRENT_BINARY_DIR}/arg0.data + ${CMAKE_CURRENT_BINARY_DIR}/forward2.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward3.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward5.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward193.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.mlir COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/llama-import.py --output-dir ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating forward.mlir, subgraph.mlir and arg0.data..." @@ -196,8 +201,8 @@ add_custom_command( add_library(DISLLAMA1 STATIC forward1.o subgraph1.o) add_custom_command( - OUTPUT forward33.o - COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward33.mlir + OUTPUT forward2.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward2.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | ${BUDDY_BINARY_DIR}/buddy-opt -arith-expand @@ -234,14 +239,14 @@ add_custom_command( ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | ${LLVM_TOOLS_BINARY_DIR}/llvm-as | ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 - -o ${CMAKE_CURRENT_BINARY_DIR}/forward33.o - DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward33.mlir - COMMENT "Building forward33.o " + -o ${CMAKE_CURRENT_BINARY_DIR}/forward2.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward2.mlir + COMMENT "Building forward2.o " VERBATIM) add_custom_command( - OUTPUT subgraph33.o - COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph33.mlir + OUTPUT subgraph2.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.mlir -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | ${BUDDY_BINARY_DIR}/buddy-opt -convert-elementwise-to-linalg @@ -280,12 +285,288 @@ add_custom_command( ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | ${LLVM_TOOLS_BINARY_DIR}/llvm-as | ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 - -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph33.o - DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph33.mlir - COMMENT "Building subgraph33.o " + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.mlir + COMMENT "Building subgraph2.o " VERBATIM) -add_library(DISLLAMA2 STATIC forward33.o subgraph33.o) +add_library(DISLLAMA2 STATIC forward2.o subgraph2.o) + +add_custom_command( + OUTPUT forward3.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward3.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward3.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward3.mlir + COMMENT "Building forward3.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph3.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.mlir + COMMENT "Building subgraph3.o " + VERBATIM) + +add_library(DISLLAMA3 STATIC forward3.o subgraph3.o) + +add_custom_command( + OUTPUT forward5.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward5.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward5.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward5.mlir + COMMENT "Building forward5.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph5.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.mlir + COMMENT "Building subgraph5.o " + VERBATIM) + +add_library(DISLLAMA4 STATIC forward5.o subgraph5.o) + +add_custom_command( + OUTPUT forward193.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward193.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward193.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward193.mlir + COMMENT "Building forward193.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph193.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.mlir + COMMENT "Building subgraph193.o " + VERBATIM) + +add_library(DISLLAMA5 STATIC forward193.o subgraph193.o) SET_SOURCE_FILES_PROPERTIES( template.o @@ -297,6 +578,9 @@ SET_TARGET_PROPERTIES( DISLLAMA0 DISLLAMA1 DISLLAMA2 + DISLLAMA3 + DISLLAMA4 + DISLLAMA5 PROPERTIES LINKER_LANGUAGE C) @@ -316,6 +600,9 @@ set(BUDDY_DIS_LLAMA_LIBS DISLLAMA0 DISLLAMA1 DISLLAMA2 + DISLLAMA3 + DISLLAMA4 + DISLLAMA5 mlir_c_runner_utils omp ) diff --git a/examples/LlamaTest/llama-import.py b/examples/LlamaTest/llama-import.py index 93bec8df..552fc9dc 100644 --- a/examples/LlamaTest/llama-import.py +++ b/examples/LlamaTest/llama-import.py @@ -1,4 +1,4 @@ -# ===- import-llama2.py -------------------------------------------------------- +# ===- llama-import.py -------------------------------------------------------- # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -99,45 +99,17 @@ def forward(self, input_ids, attention_mask, position_ids): assert len(graphs) == 1 graph = graphs[0] params = dynamo_compiler.imported_params[graph] -# pattern_list = [simply_fuse] -# graphs[0].fuse_ops(pattern_list) -graphs[0].init_op_group() -# for key, op_list in graphs[0].op_groups.items(): -# if op_list: # 确保列表非空 -# first_op = op_list[0] -# print(f"Key: {key}, First Op: {first_op}, op_count: {len(op_list)}") -# else: -# print(f"Key: {key} has an empty list.") driver = GraphDriver(graphs[0]) for i in range(len(driver.subgraphs)): driver.subgraphs[i].lower_to_top_level_ir() - -# for op in driver._subgraphs_outputs.values(): -# print(f"Op: {op}") driver.construct_main_graph(True) # Save the generated files to the specified output directory. -for i in range(len(driver.modules)): +for i in range(len(driver.subgraphs)): with open(os.path.join(output_dir, f"subgraph{i}.mlir"), "w") as module_file: print(driver.subgraphs[i]._imported_module, file=module_file) with open(os.path.join(output_dir, f"forward{i}.mlir"), "w") as module_file: print(driver.modules[i], file=module_file) - # 从 GraphDriver 中获取该子图收集到的参数索引列表 - param_indices = driver.subgraph_param_indices[i] - - # 根据参数索引从 loaded_params 中提取参数,并拼接为一维数组 - selected_arrays = [] - for idx in param_indices: - # 注意:loaded_params 中的每个参数都是一个 tensor - arr = params[idx].detach().cpu().numpy().reshape(-1) - selected_arrays.append(arr) - - if selected_arrays: - concat_arr = numpy.concatenate(selected_arrays) - else: - concat_arr = numpy.array([]) - - # 定义输出文件名,数字 i 与子图对应 - filename = os.path.join(output_dir, f"arg{i}.data") - concat_arr.tofile(filename) +for entry in driver._subgraph_param_info.items(): + driver.construct_sub_params(params, entry, output_dir) diff --git a/examples/LlamaTest/llama-main.cpp b/examples/LlamaTest/llama-main.cpp index 906304e9..3fb2b124 100644 --- a/examples/LlamaTest/llama-main.cpp +++ b/examples/LlamaTest/llama-main.cpp @@ -17,23 +17,21 @@ #include #include #include +#include #include +#include #include #include -#include -#include #include +#include #include #include -#include using namespace buddy; -constexpr size_t ParamsSize0 = 131072064; -constexpr size_t ParamsSize1 = 202383360; -constexpr size_t ParamsSize2 = 131076096; constexpr size_t MaxVocabSize = 32000; constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; constexpr size_t HiddenSize = 4096; constexpr size_t HiddenSize0 = 128; constexpr size_t HiddenSize1 = 41; @@ -44,7 +42,8 @@ struct MemRefContainer { MemRef memRef3D1; MemRef memRef3D2; - MemRefContainer(MemRef m1, MemRef m2, MemRef m3, MemRef m4) + MemRefContainer(MemRef m1, MemRef m2, MemRef m3, + MemRef m4) : memRef3D0(m1), memRef2D(m2), memRef3D1(m3), memRef3D2(m4) {} }; @@ -54,21 +53,16 @@ struct MemRefContainer { extern "C" void _mlir_ciface_forward0(MemRefContainer *, MemRef *, Text *); extern "C" void _mlir_ciface_forward1(MemRef *, MemRef *, - MemRef *, - MemRef *, - MemRef *, - MemRef *); -extern "C" void _mlir_ciface_forward33(MemRef *, MemRef *, MemRef *); -// extern "C" void _mlir_ciface_forward3(MemRef *, MemRef *, -// MemRef *); -// extern "C" void _mlir_ciface_forward4(MemRef *, MemRef *, -// MemRef *); -// extern "C" void _mlir_ciface_forward5(MemRef *, MemRef *, -// MemRef *); -// ----------------------------------------------------------------------------- -// Helper Functions -// ----------------------------------------------------------------------------- +extern "C" void _mlir_ciface_forward2(MemRef *, MemRef *, + MemRef *, MemRef *, + MemRef *, MemRef *); +extern "C" void _mlir_ciface_forward3(MemRef *, MemRef *, + MemRef *); +extern "C" void _mlir_ciface_forward5(MemRef *, MemRef *, + MemRef *); +extern "C" void _mlir_ciface_forward193(MemRef *, MemRef *, + MemRef *); /// Capture input message. void getUserInput(std::string &inputStr) { @@ -110,6 +104,7 @@ void loadParameters(const std::string ¶mFilePath, const auto loadStart = std::chrono::high_resolution_clock::now(); std::ifstream paramFile(paramFilePath, std::ios::in | std::ios::binary); if (!paramFile.is_open()) { + std::cout << paramFilePath << std::endl; throw std::runtime_error("[Error] Failed to open params file!"); } printLogLabel(); @@ -146,21 +141,50 @@ int main() { const std::string title = "LLaMA 2 Inference Powered by Buddy Compiler"; std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + int split_group[] = { + 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, + 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, + 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, + 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, + 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, + 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, + 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, + 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}; + constexpr size_t param_size_group[] = { + 131072064, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 131076096}; /// Define directories of vacabulary and parameter file. std::string llamaDir = LLAMA_DIS_EXAMPLE_PATH; std::string llamaBuildDir = LLAMA_EXAMPLE_BUILD_PATH; const std::string vocabDir = llamaDir + "/vocab.txt"; - // const std::string paramsDir0 = llamaBuildDir + "/arg0.data"; - // const std::string paramsDir1 = llamaBuildDir + "/arg1.data"; - // const std::string paramsDir2 = llamaBuildDir + "/arg33.data"; + std::vector paramsDirs; // 用容器存储路径 - std::vector paramsDirs; // 用容器存储路径 - - for (int i = 0; i < 34; i++) { // N 为需要生成的数量 + for (int i = 0; i < 194; i++) { // N 为需要生成的数量 + for (int j = 0; j < split_group[i]; j++) { // 使用 emplace_back 直接构造字符串,避免拷贝 - paramsDirs.emplace_back( - llamaBuildDir + "/arg" + std::to_string(i) + ".data" - ); + paramsDirs.emplace_back(llamaBuildDir + "/subgraph" + std::to_string(i) + + "_arg" + std::to_string(j) + ".data"); + } } /// Get user message. @@ -179,25 +203,41 @@ int main() { MemRefContainer resultContainer(myMemRef1, myMemRef2, myMemRef3, myMemRef4); MemRefContainer *resultContainerPtr = &resultContainer; MemRef resultContainer0({1, MaxTokenLength, HiddenSize}); + MemRef subResultContainer0({1, SubMaxTokenLength, HiddenSize}); + MemRef subResultContainer1({1, SubMaxTokenLength, HiddenSize}); + MemRef tmp3DContainer({1, MaxTokenLength, HiddenSize}); + MemRef sub3DContainer0({1, SubMaxTokenLength, HiddenSize}); + MemRef sub3DContainer1({1, SubMaxTokenLength, HiddenSize}); + MemRef tmp2DContainer0({MaxTokenLength, HiddenSize}); + MemRef tmp2DContainer1({MaxTokenLength, HiddenSize}); + MemRef sub2DContainer0({SubMaxTokenLength, HiddenSize}); + MemRef sub2DContainer1({SubMaxTokenLength, HiddenSize}); Text inputContainer(inputStr); std::vector> paramsContainers; - MemRef paramsContainer0({ParamsSize0}); - for(int i = 0; i < 32; i++){ - MemRef paramsContainer1({ParamsSize1}); - paramsContainers.push_back(paramsContainer1); - } - MemRef paramsContainer2({ParamsSize2}); + /// Fill data into containers // - Input: register vocabulary and tokenize the input string. // - Output: register vocabulary. // - Parameters: load parameters from the `arg0` file into the container. tokenizeInput(vocabDir, inputContainer); outputContainer.loadVocab(vocabDir); + + MemRef paramsContainer0({param_size_group[0]}); loadParameters(paramsDirs[0], paramsContainer0); - for(int i = 0; i < 32; i++){ - loadParameters(paramsDirs[i+1], paramsContainers[i]); + int params_count = 1; + for (int i = 1; i < 193; i++) { + for (int j = 0; j < split_group[i]; j++) { + if (param_size_group[i] > 0) { + MemRef paramsContainer1({param_size_group[i]}); + loadParameters(paramsDirs[params_count], paramsContainer1); + paramsContainers.push_back(paramsContainer1); + } + params_count++; + } } - loadParameters(paramsDirs[33], paramsContainer2); + MemRef paramsContainer2({param_size_group[193]}); + loadParameters(paramsDirs[params_count], paramsContainer2); + /// Run LLaMA Inference // - Perform the forward function. // - Find and append the generated token. @@ -210,16 +250,58 @@ int main() { for (int i = 0; i < generateLen; i++) { const auto inferenceStart = std::chrono::high_resolution_clock::now(); // Execute the forward pass of the model. - _mlir_ciface_forward0(resultContainerPtr, ¶msContainer0, &inputContainer); + + _mlir_ciface_forward0(resultContainerPtr, ¶msContainer0, + &inputContainer); resultContainer0 = resultContainerPtr->memRef3D0; auto resultContainer1 = resultContainerPtr->memRef2D; auto resultContainer2 = resultContainerPtr->memRef3D1; auto resultContainer3 = resultContainerPtr->memRef3D2; - _mlir_ciface_forward1(&resultContainer0, ¶msContainers[0], &resultContainer0, &resultContainer2, &resultContainer3, &resultContainer1); - for(int m = 1; m < 32; m++){ - _mlir_ciface_forward1(&resultContainer0, ¶msContainers[m], &resultContainer0, &resultContainer2, &resultContainer3, &resultContainer1); + resultContainer0.splitMemRef(std::move(resultContainer0), + subResultContainer0, subResultContainer1, 1, + 20); + for (int m = 0; m < 32; m++) { + _mlir_ciface_forward1(&sub3DContainer0, ¶msContainers[m * 6], + &subResultContainer0); + _mlir_ciface_forward1(&sub3DContainer1, ¶msContainers[m * 6], + &subResultContainer1); + tmp3DContainer.concatenateMemRefs(sub3DContainer0, sub3DContainer1, + tmp3DContainer, 1); + _mlir_ciface_forward2(&tmp2DContainer0, ¶msContainers[m * 6 + 1], + &tmp3DContainer, &resultContainer2, + &resultContainer3, &resultContainer1); + _mlir_ciface_forward2(&tmp2DContainer1, ¶msContainers[m * 6 + 2], + &tmp3DContainer, &resultContainer2, + &resultContainer3, &resultContainer1); + tmp2DContainer0.addMemRef(tmp2DContainer0, tmp2DContainer1); + tmp2DContainer0.splitMemRef(std::move(tmp2DContainer0), sub2DContainer0, + sub2DContainer1, 0, 20); + _mlir_ciface_forward3(&subResultContainer0, &sub2DContainer0, + &subResultContainer0); + _mlir_ciface_forward3(&subResultContainer1, &sub2DContainer1, + &subResultContainer1); + _mlir_ciface_forward1(&sub3DContainer0, ¶msContainers[m * 6 + 3], + &subResultContainer0); + _mlir_ciface_forward1(&sub3DContainer1, ¶msContainers[m * 6 + 3], + &subResultContainer1); + tmp3DContainer.concatenateMemRefs(sub3DContainer0, sub3DContainer1, + tmp3DContainer, 1); + _mlir_ciface_forward5(&tmp2DContainer0, ¶msContainers[m * 6 + 4], + &tmp3DContainer); + _mlir_ciface_forward5(&tmp2DContainer1, ¶msContainers[m * 6 + 5], + &tmp3DContainer); + tmp2DContainer0.addMemRef(tmp2DContainer0, tmp2DContainer1); + tmp2DContainer0.splitMemRef(std::move(tmp2DContainer0), sub2DContainer0, + sub2DContainer1, 0, 20); + _mlir_ciface_forward3(&subResultContainer0, &sub2DContainer0, + &subResultContainer0); + _mlir_ciface_forward3(&subResultContainer1, &sub2DContainer1, + &subResultContainer1); } - _mlir_ciface_forward33(&resultContainer0, ¶msContainer2, &resultContainer0); + tmp3DContainer.concatenateMemRefs(subResultContainer0, subResultContainer1, + tmp3DContainer, 1); + _mlir_ciface_forward193(&resultContainer0, ¶msContainer2, + &tmp3DContainer); const auto inferenceEnd = std::chrono::high_resolution_clock::now(); const std::chrono::duration inferenceTime = @@ -243,12 +325,11 @@ int main() { // Append the generated token into the input and output container. inputContainer.appendTokenIdx(maxIndex); outputContainer.appendTokenIdx(maxIndex); - + free(myMemRef1.release()); free(myMemRef2.release()); free(myMemRef3.release()); free(myMemRef4.release()); - free(resultContainer0.release()); } /// Print the final result diff --git a/examples/LlamaTest/vocab.txt b/examples/LlamaTest/vocab.txt new file mode 100644 index 00000000..eb90f73b --- /dev/null +++ b/examples/LlamaTest/vocab.txt @@ -0,0 +1,32000 @@ + + + +<0x00> +<0x01> +<0x02> +<0x03> +<0x04> +<0x05> +<0x06> +<0x07> +<0x08> +<0x09> +<0x0A> +<0x0B> +<0x0C> +<0x0D> +<0x0E> +<0x0F> +<0x10> +<0x11> +<0x12> +<0x13> +<0x14> +<0x15> +<0x16> +<0x17> +<0x18> +<0x19> +<0x1A> +<0x1B> +<0x1C> +<0x1D> +<0x1E> +<0x1F> +<0x20> +<0x21> +<0x22> +<0x23> +<0x24> +<0x25> +<0x26> +<0x27> +<0x28> +<0x29> +<0x2A> +<0x2B> +<0x2C> +<0x2D> +<0x2E> +<0x2F> +<0x30> +<0x31> +<0x32> +<0x33> +<0x34> +<0x35> +<0x36> +<0x37> +<0x38> +<0x39> +<0x3A> +<0x3B> +<0x3C> +<0x3D> +<0x3E> +<0x3F> +<0x40> +<0x41> +<0x42> +<0x43> +<0x44> +<0x45> +<0x46> +<0x47> +<0x48> +<0x49> +<0x4A> +<0x4B> +<0x4C> +<0x4D> +<0x4E> +<0x4F> +<0x50> +<0x51> +<0x52> +<0x53> +<0x54> +<0x55> +<0x56> +<0x57> +<0x58> +<0x59> +<0x5A> +<0x5B> +<0x5C> +<0x5D> +<0x5E> +<0x5F> +<0x60> +<0x61> +<0x62> +<0x63> +<0x64> +<0x65> +<0x66> +<0x67> +<0x68> +<0x69> +<0x6A> +<0x6B> +<0x6C> +<0x6D> +<0x6E> +<0x6F> +<0x70> +<0x71> +<0x72> +<0x73> +<0x74> +<0x75> +<0x76> +<0x77> +<0x78> +<0x79> +<0x7A> +<0x7B> +<0x7C> +<0x7D> +<0x7E> +<0x7F> +<0x80> +<0x81> +<0x82> +<0x83> +<0x84> +<0x85> +<0x86> +<0x87> +<0x88> +<0x89> +<0x8A> +<0x8B> +<0x8C> +<0x8D> +<0x8E> +<0x8F> +<0x90> +<0x91> +<0x92> +<0x93> +<0x94> +<0x95> +<0x96> +<0x97> +<0x98> +<0x99> +<0x9A> +<0x9B> +<0x9C> +<0x9D> +<0x9E> +<0x9F> +<0xA0> +<0xA1> +<0xA2> +<0xA3> +<0xA4> +<0xA5> +<0xA6> +<0xA7> +<0xA8> +<0xA9> +<0xAA> +<0xAB> +<0xAC> +<0xAD> +<0xAE> +<0xAF> +<0xB0> +<0xB1> +<0xB2> +<0xB3> +<0xB4> +<0xB5> +<0xB6> +<0xB7> +<0xB8> +<0xB9> +<0xBA> +<0xBB> +<0xBC> +<0xBD> +<0xBE> +<0xBF> +<0xC0> +<0xC1> +<0xC2> +<0xC3> +<0xC4> +<0xC5> +<0xC6> +<0xC7> +<0xC8> +<0xC9> +<0xCA> +<0xCB> +<0xCC> +<0xCD> +<0xCE> +<0xCF> +<0xD0> +<0xD1> +<0xD2> +<0xD3> +<0xD4> +<0xD5> +<0xD6> +<0xD7> +<0xD8> +<0xD9> +<0xDA> +<0xDB> +<0xDC> +<0xDD> +<0xDE> +<0xDF> +<0xE0> +<0xE1> +<0xE2> +<0xE3> +<0xE4> +<0xE5> +<0xE6> +<0xE7> +<0xE8> +<0xE9> +<0xEA> +<0xEB> +<0xEC> +<0xED> +<0xEE> +<0xEF> +<0xF0> +<0xF1> +<0xF2> +<0xF3> +<0xF4> +<0xF5> +<0xF6> +<0xF7> +<0xF8> +<0xF9> +<0xFA> +<0xFB> +<0xFC> +<0xFD> +<0xFE> +<0xFF> +▁▁ +▁t +er +in +▁a +en +on +▁th +es +▁▁▁▁ +▁s +▁d +at +or +an +▁c +is +re +it +▁the +ar +le +▁w +▁p +ou +al +▁f +▁m +ed +▁o +▁b +om +ion +ing +ic +as +el +ent +▁in +▁h +nd +et +▁l +▁n +st +▁to +ch +▁I +ro +▁▁▁▁▁▁▁▁ +il +▁of +de +ct +▁( +am +▁C +▁de +▁S +▁u +▁A +▁\ +▁e +▁and +▁T +ol +▁v +im +ot +ad +ut +▁g +em +ur +id +▁* +ig +ra +▁re +▁is +qu +ow +▁M +est +▁y +se +ve +ce +ie +un +▁P +▁B +ag +ul +▁= +he +end +ode +ter +ment +os +▁D +if +ation +▁for +▁r +▁L +▁you +▁be +ly +ver +ab +te +▁it +▁on +ri +us +▁" +▁wh +▁con +▁H +▁st +ir +▁E +▁F +ck +▁an +th +eg +ay +ith +▁R +ist +and +▁that +▁al +▁$ +▁# +od +um +▁W +ht +code +▁G +ate +ess +▁N +ere +pp +▁as +▁se +▁pro +▁with +pe +▁k +ers +pt +); +lo +▁▁▁▁▁ +▁com +ame +▁` +▁Com +ia +ant +▁la +▁{ +▁en +ction +▁ex +ld +ub +▁j +la +ue +▁J +ich +▁do +▁O +▁qu +iv +ort +art +▁un +▁## +▁this +ke +▁ha +▁- +out +▁The +▁not +▁ne +ill +▁le +ci +rom +ine +// +op +egin +▁Comment +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +begin +ст +ass +iz +). +og +▁п +▁or +▁was +▁at +our +▁i +ain +▁K +на +▁V +ge +▁su +ap +age +ould +ne +av +xt +ore +ile +-- +▁в +▁by +li +ath +ра +ber +ach +all +▁Th +ult +▁} +▁U +▁us +▁z +ust +▁have +lic +ни +▁can +tr +com +), +▁In +ind +ell +▁from +ов +to +▁[ +able +ost +▁ch +ect +ight +int +▁' +▁are +▁im +▁sh +▁< +▁An +▁с +ata +ire +▁tr +con +ord +ity +ard +▁▁▁▁▁▁ +▁he +▁but +oc +=" +▁pr +ure +per +ack +ork +ong +ans +ко +ple +▁des +ok +orm +wer +ak +pr +ase +▁el +ph +ac +▁und +▁ar +▁if +ud +ps +ite +ble +но +fer +pl +ive +ang +ens +ро +▁so +so +ast +() +swer +ru +ies +▁: +au +ov +ре +го +▁der +▁my +▁we +▁me +nt +▁ad +urn +▁your +:// +are +▁all +ff +io +estion +ime +▁er +lass +▁и +▁which +ome +ont +▁par +▁ma +▁Y +", +▁о +ft +ial +cc +ound +▁li +▁res +eth +ject +▁app +▁St +ice +▁am +act +▁del +gr +ated +ier +▁▁▁▁▁▁▁▁▁▁▁▁ +▁ab +▁et +ally +.. +port +ik +▁per +▁cont +ри +ка +ser +ли +ll +iew +ign +_{ +put +one +unction +▁di +ary +ition +ma +ен +get +▁lo +▁val +▁Q +ran +▁д +ence +▁work +▁на +ip +item +ype +▁& +▁his +▁use +der +▁Answer +▁will +ize +та +low +▁Ch +▁get +ide +ous +ink +ption +ла +turn +ung +ec +ug +form +res +htt +oug +ль +▁no +cl +▁ro +▁one +tt +cri +du +▁up +то +(" +▁ob +we +ory +▁est +ery +iel +str +ob +▁que +ian +▁out +▁pl +▁new +ки +▁+ +ry +oth +ther +▁var +▁would +▁ser +tern +text +▁there +ish +ror +те +▁set +▁@ +▁по +▁te +ex +▁return +ail +▁any +▁It +▁function +{\ +', +és +ale +ан +▁when +ib +▁go +ance +▁had +▁Qu +▁comp +ле +▁з +math +▁has +▁м +▁pre +ener +▁part +elf +▁die +▁like +ray +irst +▁dis +▁man +rit +▁then +▁class +pro +▁po +▁using +eb +▁code +own +▁some +ces +▁$\ +ер +lect +▁au +isch +▁col +▁– +up +ons +▁add +ild +iss +val +ount +les +vent +▁▁▁▁▁▁▁▁▁▁▁▁▁ +▁Z +In +row +ear +ations +ah +que +ublic +ank +▁sp +▁Wh +---- +sk +ew +ags +ти +ann +▁— +ert +ace +sch +▁need +▁à +ien +ough +не +▁def +ij +ern +▁what +▁Ar +wo +ml + +▁fil +name +inal +▁il +ample +▁way +ica +во +cess +itt +uch +▁where +ми +org +https +▁vo +ient +ove +▁value +eng +▁La +^{ +ref +ied +ER +▁stat +fig +me +▁von +▁inter +roid +ater +▁their +▁bet +▁ein +}\ +"> +▁sub +▁op +▁don +ty +▁try +▁Pro +▁tra +▁same +ep +▁two +▁name +old +let +▁sim +sp +▁av +bre +blem +ey +▁could +▁cor +▁acc +ays +cre +urr +si +▁const +ues +}$ +View +▁act +▁bo +▁ко +▁som +▁about +land +mer +▁list +cal +▁import +col +▁na +na +:: +▁who +▁error +▁X +ator +ext +▁been +ér +▁run +pos +▁cl +** +▁К +ular +ause +▁reg +▁know +▁see +▁him +ning +▁за +ates +fore +ions +▁hel +ute +▁rem +▁го +▁Mar +ру +vice +irect +ner +▁under +rib +hr +че +▁As +▁end +ember +▁а +▁att +ina +son +▁follow +▁Sch +pect +▁rel +▁So +▁look +abel +▁problem +▁van +strong +co +pon +ca +ada +": +cond +amb +}, +quest +▁aut +▁result +▁may +Re +http +): +▁And +red +▁How +po +ско +att +oup +ced +▁type +▁than +▁cons +uf +ци +▁question +raph +igh +▁М +▁htt +ins +den +▁da +▁ver +oh +▁=> +riv +ude +▁For +▁ra +frac +ма +▁after +}{ +▁method +") +amp +ash +▁rec +▁differ +ON +ax +ament +ource +Con +its +Name +man +▁bec +che +▁En +aj +▁gener +IN +▁id +ages +▁loc +fo +br +▁she +Pro +▁una +▁к +eta +log +olog +▁sur +arg +▁-- +kt +(\ +min +▁line +▁vari +ся +ics +ня +very +add +▁object +Id +▁But +▁case +▁make +▁cal +▁pass +сь +ession +net +." +▁г +är +де +no +ating +ato +line +ви +▁Ex +▁ass +▁vers +ля +▁ed +umn +other +ста +ative +String +▁los +wn +▁answer +▁let +▁pe +ents +▁fe +ince +ni +ider +ows +▁test +▁here +roll +▁call +ruct +▁pol +ait +▁back +ho +Ex +ress +ST +ried +date +ет +▁did +ting +▁El +▁dem +)$ +ова +urrent +lace +right +ren +по +▁each +cy +block +data +▁% +▁ac +▁== +ür +▁por +ask +arch +ames +▁Con +ча +▁off +▁find +cont +▁now +work +ational +dd +ción +▁А +ault +List +▁ext +urs +ake +ule +▁point +AT +aut +▁trans +▁co +▁read +▁used +ски +ari +LE +eter +oun +ever +self +ined +idth +ux +js +▁such +▁Is +ée +ful +▁dist +▁bu +itemize +Cont +je +си +▁prov +bb +ward +esent +erson +anks +wh +not +▁We +ka +rop +atur +als +▁bel +ör +fr +▁example +▁incl +amil +▁ра +▁“ +▁string +▁think +Th +▁tem +ave +▁Fran +▁number +▁si +imes +tem +my +ler +load +== +▁hand +za +▁because +▁sch +vo +this +ID +ão +▁start +▁war +▁help +ts +▁char +▁ph +▁min +til +rite +-------- +els +▁mit +edia +ку +▁Sh +any +]; +▁Б +ique +da +ef +dex +▁produ +▁Н +gram +▁Or +▁gre +quote +leg +orn +▁ind +▁post +▁dep +], +vi +▁user +▁> +lick +▁very +ething +▁array +▁gu +▁dur +`. +ть +lication +сти +ek +ico +▁dat +ор +html +ione +▁different +▁check +▁fr +▁Er +▁text +ні +icht +stack +EN +rag +▁every +Ar +▁before +alse +▁fin +▁dé +▁these +▁det +Val +ception +▁android +blockquote +▁je +file +ats +▁до +essage +▁again +aw +Ch +ween +▁Д +for +cial +play +pre +ida +▁Par +ny +ract +▁supp +ased +lection +▁dans +air +rol +▁thr +Data +lich +▁про +▁long +▁second +ually +ines +▁found +ength +yp +ead +▁log +ui +new +▁Р +go +aus +ody +▁son +ме +ero +ved +sub +▁right +view +▁following +') +"); +▁said +же +чи +ту +ott +се +ars +$. +gg +▁br +ool +yle +use +▁show +lease +cia +▁direct +doc +ар +ms +▁giv +▁exp +ql +ду +ве +▁Be +Com +iter +RE +mp +men +▁Ro +MA +▁Col +ister +▁well +▁ +ene +▁mon +▁dec +▁still +▁об +▁Tr +▁ф +ife +ism +by +raw +ior +▁med +orld +▁comple +ww +▁art +ron +▁Г +▁My +▁als +rect +▁auf +▁down +ather +Col +Text +back +$, +▁year +мо +pi +▁Gr +ream +▁rep +bf +www +▁wur +▁org +inter +▁Die +▁being +". +label +▁cent +java +bar +ante +ana +__ +▁solution +▁О +▁fl +▁create +ici +ste +ython +unt +ason +ference +SE +▁non +ane +▁ins +ader +_{\ +Res +▁main +пи +▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +▁There +▁pour +RO +`, +lish +bject +ccess +▁orig +▁▁▁ +ischen +ower +▁het +uc +▁else +». +▁от +equ +sible +test +stand +én +ets +GE +ident +▁е +▁при +., +▁das +ock +," +▁vol +▁fo +▁para +▁Т +▁Car +ral +▁Sp +var +▁play +ouse +▁та +ically +▁contain +ponse +▁String +án +▁both +ken +AR +ере +▁Il +▁iss +▁open +▁) +▁What +fe +rivate +reg +▁without +▁zu +vis +flow +▁http +abase +▁word +▁change +▁works +▁ge +▁! +▁een +itle +▁event +word +ando +SB +rem +▁field +ving +Ser +▁our +▁qui +▁oper +▁ist +def +▁made +ние +px +▁men +rm +ais +cent +list +To +▁To +ja +vert +▁mar +value +▁„ +"; +▁aus +▁Br +ole +▁mult +ought +▁mat +▁view +fil +▁со +га +▁void +▁good +бо +CT +▁many +ben +▁во +▁ка +▁system +ino +▁another +▁rest +user +ility +ai +▁might +ustom +▁order +▁Ver +SS +}) +▁eff +до +ett +▁sign +му +IT +string +elle +▁sing +cul +▁trying +▁beg +▁page +хо +▁Can +▁Ser +++ +▁must +▁values +▁key +ible +]. +ird +▁program +roller +▁conne +▁say +▁param +ache +velop +▁select +▁famil +▁last +▁Thanks +▁pop +}. +eq +▁doesn +[' +▁term +▁ré +▁document +па +лу +ateg +.) +ling +ional +ables +▁tak +utton +▁arg +type +▁sure +▁real +▁web +▁current +▁Pl +cho +ments +▁Joh +ots +▁exist +ну +▁für +▁из +do +ного +▁las +▁null +▁inform +▁Л +▁version +▁chang +ager +▁Comm +лі +ush +▁Ge +▁high +▁input +ogle +ros +box +gen +▁ste +▁local +Im +▁process +ternal +ized +ги +ét +▁Ind +▁och +lt +▁column +▁tried +▁command +▁best +aster +за +▁prim +▁model +▁і +▁those +ities +ère +▁ре +је +ши +ques +▁Am +▁own +lin +зи +Value +thing +▁, +▁Te +▁stud +▁um +▁server +ille +▁put +ativ +gy +ови +raf +ово +▁wurde +▁When +▁div +ants +▁ter +▁partic +▁т +▁Do +▁No +sert +ido +mathcal +ade +▁II +lear +ograph +ense +▁row +num +▁possible +▁since +▁Bo +ctions +▁Im +OR +ці +▁ide +map +▁correct +ves +php +▁output +▁Ph +AL +ared +\\ +▁image +esch +жи +▁conf +por +query +ures +ium +ends +▁Ab +SBN +ід +ether +ptions +itu +lib +ns +ki +▁working +▁como +▁Then +ML +key +class +ople +ittle +▁match +ways +mathbb +▁require +alt +▁vis +▁bl +▁called +Item +ura +vec +eme +▁della +embre +urg +Se +▁request +ische +▁port +▁instead +=\ +▁У +hor +ente +ume +erd +са +▁why +rist +▁person +▁... +▁private +▁tot +pha +ift +ita +loc +▁old +он +▁nel +'] +ti +iet +cite +plement +▁above +ks +ready +▁come +section +▁Pol +▁writ +▁https +▁$$ +▁» +▁build +ito +▁consider +aft +App +,\ +indows +comm +▁; +ground +▁place +By +▁project +Object +▁repr +ences +indow +zt +▁files +cz +ivity +▁init +▁prob +▁sk +orth +iment +ouble +atal +irc +▁è +▁bre +ista +input +▁И +ной +sum +path +▁cour +▁too +▁Ad +▁Gu +▁false +▁fun +▁ст +ood +ès +▁enc +bol +rl +arget +order +▁mean +пе +igen +▁пре +width +; +itor +▁state +▁great +enn +bin +Er +Mod +oz +▁won +▁fact +▁java +▁Univers +▁cap +istor +}( +ku +ither +ales +▁ou +ross +▁take +rix +lob +▁eine +ases +▁access +ité +istr +ization +▁appro +ball +▁mak +}^ +▁Cons +press +serv +(). +af +▁ref +)\ +▁contin +su +iver +▁cond +▁expect +▁charact +bert +elt +ters +script +▁Ed +apt +'); +print +▁size +▁sich +face +enden +▁Amer +ified +ów +▁Su +tes +med +▁Reg +sole +▁includ +ini +inci +▁pla +▁left +df +Par +▁All +▁occ +▁At +▁cr +Qu +▁given +▁System +ican +▁final +itions +▁бы +▁perform +AN +▁Me +uro +▁That +гра +▁По +▁ви +ably +▁present +duct +ric +▁Eng +try +▁lar +bl +idd +▁är +ora +LL +oss +▁ISBN +▁three +jo +ní +rc +▁far +▁Not +▁little +dis +ati +function +▁able +less +со +▁path +▁pres +lose +PI +▁issue +ackage +time +ige +ams +▁Cl +ails +alk +ii +ше +pen +QL +▁eas +RL +cel +▁sl +▁ask +▁nom +▁top +ides +index +ém +▁happ +ox +cd +▁better +▁load +ados +zen +▁ce +▁fa +▁John +IMA +▁Bar +overflow +▁де +ness +cer +▁Here +ret +▁sz +ambda +opy +url +py +rt +▁understand +ał +her +## +▁child +▁exec +▁application +▁struct +▁я +File +▁cert +ison +▁variable +DE +rs +▁really +Port +ba +▁Ber +▁inte +▁static +▁config +▁She +estions +▁plus +▁hab +ope +▁mus +▁count +ME +▁support +▁people +▁beh +▁already +Tr +▁done +dem +size +alpha +▁disc +]) +▁Man +▁mil +▁stand +▁group +▁small +▁mag +сть +▁default +▁single +link +clude +▁ear +ilar +**** +▁fix +ley +▁pas +ний +ission +▁implement +itch +▁года +▁always +▁Jah +pring +ção +plate +▁descri +▁head +init +ograf +▁query +ived +▁ing +pty +ha +▁mov +▁э +ette +ily +▁got +iled +icro +▁wr +ря +▁never +ores +▁bas +ios +lack +aint +vious +▁give +idad +En +ный +table +▁На +▁pat +тор +angu +loy +▁seg +array +▁Fl +▁index +▁sw +IMAGE +▁km +би +Class +ena +мен +comp +atus +rap +▁List +Error +▁typ +▁ма +cs +': +ji +▁However +▁те +▁below +▁App +ще +}_ +bum +vir +ées +▁record +tain +lem +ital +▁imp +ego +▁od +▁rece +mit +ffic +stackoverflow +ieve +▁З +▁nov +це +▁Intern +bu +▁sugg +▁loop +ride +▁$( +▁super +rid +ных +▁Per +▁dom +=' +utsch +len +▁write +▁inv +outh +▁Her +▁years +▁original +ega +▁Ste +▁seems +ég +▁next +eder +▁Ne +avas +ification +Exception +▁Der +▁ve +atic +hat +brary +return +urch +ision +mi +oint +▁day +iction +ál +▁és +▁though +action +ít +ungen +ours +▁script +▁information +▁multi +▁\\ +ster +ке +AC +cies +▁display +oman +Time +ius +)); +tre +▁lim +ately +éd +iste +▁са +post +uel +img +▁ч +ска +eld +pper +ula +▁general +Al +Form +▁upon +zo +amente +▁prom +▁ü +lex +▁turn +▁ме +ention +лен +▁af +icle +ств +▁Fil +▁Ф +avascript +Man +ara +ware +align +angle +▁Sc +unic +▁fran +Un +zi +met +Add +▁pub +ков +▁gen +▁pod +▁sum +▁having +▁avec +sl +▁fig +▁Res +Date +ules +with +ский +gu +ET +▁bro +rie +aps +ending +mail +ook +▁success +berg +▁deb +elta +()` +ential +frame +Key +inn +▁simple +ival +▁care +▁Web +"). +> +ko +▁exper +▁separ +yl +ourn +▁dev +▁auch +▁block +book +▁map +illa +▁comput +▁space +result +)} +▁echo +config +hi +▁large +▁width +▁Go +mat +▁diff +▁kind +ances +ynam +▁color +Int +sol +▁pi +▁character +oment +▁response +igma +wards +arrow +су +ties +▁über +Image +yd +▁пере +▁node +▁item +achine +ima +▁va +▁approach +▁wer +▁че +On +ollow +она +cted +ured +Controller +lied +▁jo +▁dal +unk +▁î +start +ola +▁compon +IC +bit +▁base +пу +▁idea +▁dire +▁rad +group +▁With +server +side +sing +▁dies +▁near +▁voor +▁argument +▁}, +▁land +▁names +▁option +ithub +pped +aug +▁links +▁full +▁situ +▁console +▁etc +aux +▁Cor +icrosoft +▁came +local +▁known +▁multiple +anguage +▁total +ology +ät +▁Х +▁fre +▁ten +ideo +▁bes +true +Query +omm +▁Art +▁keep +▁University +reate +pport +▁python +tra +ector +рі +oph +▁conc +▁four +viron +▁via +?" +image +oll +ные +▁context +▁sem +._ +▁eng +mar +AD +▁mor +▁Cal +▁cell +imal +ATE +▁inf +ön +uffer +sq +.... +▁zur +With +ран +chn +▁door +content +▁miss +▁simp +ár +ira +▁hat +Test +▁certain +NS +▁cho +▁adv +where +▁looking +▁times +них +uto +▁É +can +host +▁(* +loat +▁nicht +Field +burg +const +ades +▁Mus +▁nothing +▁incre +▁Min +▁power +▁American +ln +valid +ungs +▁National +▁San +▁York +Request +char +▁Ze +button +▁alg +SON +▁ap +uff +ability +ем +▁anything +ela +()) +ба +ampion +▁pot +▁fut +ailable +▁prop +"] +▁less +lag +▁August +It +▁please +▁style +▁Also +bt +▁probably +▁One +▁poss +UI +uit +▁West +hn ++\ +Button +json +err +rame +dom +ilon +alf +▁client +▁continu +xml +pec +ador +ls +▁however +▁Any +änd +mathrm +▁url +▁book +▁gl +ives +gi +▁tro +▁US +point +open +▁cur +▁era +▁particular +▁HT +oot +ello +lobal +▁action +▁Int +▁include +▁elements +ная +ards +▁Bl +▁hum +from +change +▁functions +hen +Service +▁height +▁Land +ias +gs +ión +лов +node +.” +hand +▁бу +▁amb +▁Lu +▁throw +▁mot +▁Act +▁world +_\ +base +▁Co +▁arch +▁#### +ged +pril +older +Model +▁several +lie +check +]{ +cons +▁Tra +heck +▁least +down +ebru +Def +param +ischer +▁cas +CH +▁address +▁раз +ufen +urope +ей +▁bound +CO +▁Ang +▁Ma +Index +core +ouch +atabase +ribution +document +Le +}_{ +vern +▁statement +▁Brit +ono +psilon +▁level +▁product +IS +▁course +▁Mr +> +▁background +▁ret +ering +most +сько +▁thread +itional +ites +Pl +▁dos +ga +day +▁Gener +▁tw +Ad +">< +▁($ +▁moment +title +create +version +Manager +▁fur +pping +ijn +ос +▁rather +ptember +OS +▁site +▁caus +ani +▁home +мі +▁short +pa +▁lead +ished +cing +ording +▁prote +сле +LECT +▁didn +position +"," +(), +trans +▁lot +▁од +AS +▁sat +▁points +github +style +▁году +▁Dis +ponent +omet +zer +ULL +▁pa +AP +aces +▁United +ama +ety +Color +▁enough +US +▁length +()); +^{\ +fty +Box +apter +▁complet +ник +max +object +({ +imgur +itive +unch +▁Sub +ende +гу +ategory +ты +iano +▁upd +▁Aust +}{\ +top +las +pis +iness +▁{ +▁Е +Gr +▁AS +▁ве +thers +▁defined +azione +▁offic +▁autom +ün +▁brow +▁serv +▁remove +iro +▁Bibli +ED +▁whole +▁ш +▁Java +▁zum +ua +pm +dev +кра +olds +▁War +än +pass +uz +[" +▁tri +ised +ха +▁memory +▁Port +oper +Up +▁Thank +▁Mich +ych +board +бу +Inst +▁begin +ination +▁Mod +_, +▁Den +option +▁construct +▁Just +Map +run +▁respect +ham +ман +imedia +▁apply +cription +main +▁Ка +oid +Code +}; +Info +▁format +Log +▁су +▁lat +utor +▁reference +▁calcul +onn +Lo +infty +▁along +▁č +▁task +▁ev +theta +ras +jor +▁бо +▁princip +My +▁einer +▁Es +omb +quad +^{- +ump +▁till +ді +▁looks +▁ok +ца +nu +Fil +▁sont +▁Med +ague +▁cost +▁Sim +▁comment +▁(\ +egen +▁parameter +▁France +rep +▁TH +▁yet +▁away +▁circ +▁API +emp +ві +Layout +▁lines +▁Part +empt +▁Bi +▁mind +ky +ging +▁report +▁Add +род +▁range +cias +lip +▁Kar +▁Commons +gerufen +aff +sec +▁html +lig +▁window +inition +cis +▁ut +eln +▁aux +▁neg +Hand +▁); +▁anal +▁fri +▁си +etch +md +page +▁library +▁:= +ROM +You +space +▁durch +▁host +aven +▁File +alle +тив +▁pap +ство +mark +▁mais +erman +Size +ек +▁Ма +▁isn +▁copy +sten +river +▁went +▁javascript +▁sam +▁frame +▁vi +▁previous +rodu +▁methods +▁necess +NA +cket +▁opt +Loc +how +▁în +ship +▁itself +▁Please +iene +вер +▁<< +▁mill +▁trad +pace +▁Har +iten +wise +write +ции +ры +Line +olo +▁accept +height +▁elect +ella +▁på +Select +▁ли +▁\< +(( +▁ID +ops +ван +ió +TP +», +nection +parent +▁Mag +Table +Over +▁network +спо +▁assign +igger +irm +)` +ottom +beta +▁dell +▁body +▁да +▁Your +▁fue +▁package +▁light +▁** +MP +▁cou +yes +:\ +▁Ч +▁mention +ensch +▁deg +▁convert +▁Dav +adt +Result +though +▁bus +xy +▁seen +All +public +ively +▁Rec +▁His +sim +▁för +▁histor +▁sett +rat +abled +▁», +google +Web +él +▁title +▁Janu +ја +▁took +iden +sz +▁Get +▁objects +▁common +▁changes +▁Lond +▁extern +▁ju +Is +▁available +tri +▁más +osa +Be +▁Data +ural +▁hom +▁account +oo +▁perm +respond +yt +▁send +▁returns +ivid +▁expla +ín +▁nor +If +▁From +▁target +fect +ент +▁uit +▁Jo +▁variables +▁series +▁func +▁himself +▁ча +anti +▁ach +ialog +▁std +ae +▁foot +▁unter +gress +Not +rad +fér +▁util +orem +▁sou +opt +▁og +▁uma +itar +▁Ok +ück +sqrt +▁ant +▁werden +år +}); +▁Paris +▁exception +▁determ +▁Vol +▁Sam +▁ess +lies +ioni +oding +idget +▁pri +▁whether +▁под +▁numbers +▁~ +event +▁shows +atures +▁house +▁face +▁się +vironment +van +▁including +▁<- +times +now +▁pur +ifier +▁emp +▁cla +mon +▁Das +ady +▁від +▁ц +abor +OST +▁band +▁ú +▁exactly +iert +avig +▁redu +▁SE +lished +Bu +Message +cell +fully +▁sv +▁makes +pol +▁required +ferrer +▁pers +▁mi +FI +▁Paul +▁UI +▁Bel +inc +▁contains +Out +asure +pu +oto +▁game +zn +▁Why +orith +big +кий +sigma +▁quite +▁jed +rec +▁SQL +бе +▁Mart +ya +▁school +▁simply +▁vor +▁double +рав +▁Str +iem +▁album +▁resol +▁dei +▁Wik +▁aw +umb +ols +▁*/ +▁ze +▁anim +/> +ris +resh +No +iques +current +▁period +▁April +▁store +',' +▁Set +={ +ached +▁Mal +▁Pal +antes +aterial +▁worked +leq +oreferrer +▁happen +▁box +ney +▁close +▁gran +▁lie +▁ir +▁expected +▁для +click +și +▁parte +ogn +▁Form +▁memb +▁plan +▁team +][ +▁commun +orry +ency +gl +inary +cdot +^\ +▁First +ander +▁Dec +request +ства +▁structure +▁|| +▁Comp +actory +▁Mil +▁Some +Stream +▁assum +uen +▁words +▁September +▁Ко +▁days +ories +став +sm +vin +partial +▁parent +oj +нии +!" +ugin +▁Windows +Ed +:} +▁q +▁ben +iana +▁label +state +uted +▁() +▁сво +▁edit +uring +▁NS +▁Jahr +▁provide +He +▁Yes +anel +ename +▁Don +isk +gra +elij +▁root +*/ +▁Fre +▁Mor +used +range +▁tamb +▁module +▁directory +ounds +Activity +▁mu +info +▁free +orge +tab +)= +lang +▁ос +▁FROM +▁enter +▁became +idae +хи +▁States +verse +▁expl +ynt +UN +ee +endent +▁making +▁"$ +uni +quence +▁lui +HT +▁uses +zie +nia +Content +▁Count +▁standard +ENT +▁кон +fort +adas +зу +System +▁Sw +▁ever +LO +▁correspond +▁Po +argin +кт +ій +▁remain +cio +▁actual +сту +▁sind +▁Pe +▁changed +▁Note +skie +▁family +ità +cos +txt +ker +ceed +▁arr +▁cam +izer +▁Dan +hel +icult +HP +iler +▁Sal +▁connection +usion +kn +RI +▁vom +Listener +▁ö +▁dim +▁press +▁esc +▁Try +atalog +▁thanks +DO +▁written +dir +rew +▁fire +▁Nach +▁á +enc +▁origin +▁November +▁}; +Count +▁За +▁graph +▁mis +▁External +▁▁▁▁▁▁▁▁▁ +▁options +▁URL +▁php +▁integr +Config +▁Text +inner +▁crit +,” +▁tog +$$ +nof +▁ses +ühr +▁Since +Des +ube +▁section +▁gi +ford +▁Ass +ainer +ttp +▁behav +ports +draw +This +ranch +inding +▁estab +▁obtain +rich +licit +ев +▁qual +▁za +▁har +▁fac +aar +jet +icles +▁Aus +▁hor +▁remov +▁wie +Client +▁natur +hip +Sub +▁random +DF +▁area +tag +Pr +▁Ital +▁roku +nofollow +*} +▁others +▁limit +▁sil +▁sav +▁often +▁render +DB +▁Mc +▁zijn +жен +▁tag +ming +lichen +pack +▁Ag +▁sense +pg +Method +aged +ág +ła +▁interest +▁associ +volution +▁empty +iche +▁gro +▁types +▁Sie +Inter +▁noreferrer +▁gives +hal +▁save +▁font +ruction +Script +▁alla +▁says +▁fu +ape +▁language +iger +▁King +bor +uv +▁shall +▁Europe +▁einem +▁water +▁govern +anz +ators +▁month +ye +▁important +atz +first +▁Trans +▁Mad +▁bra +ika +▁Saint +oria +kre +ements +▁Ben +lav +▁admin +▁Hen +ril +▁Sm +cat +▁Refer +▁Ш +▁pract +▁Pat +▁Gre +▁young +▁Inter +oma +teger +ibility +▁parameters +▁everything +dat +urop +olean +▁returned +▁Class +acy +#### +▁př +▁folder +▁kon +▁guess +gt +jen +annel +icon +▁comb +rict +▁hij +▁author +see +here +stra +▁entire +▁directly +raft +heet +ester +▁ми +▁mass +untu +▁users +chi +PE +▁component +Click +Att +▁sobre +ands +▁Hol +▁Sant +ori +▁sua +std +entic +CC +▁filter +SQL +▁God +At +▁му +▁performance +delta +ande +amer +ды +▁cult +▁Nor +but +▁lik +******** +ствен +▁comme +▁dr +imer +ordin +▁condition +este +([ +FF +ться +imo +rab +іль +▁half +each +Dis +▁rows +▁hon +▁together +▁și +medi +agn +alled +▁vill +ING +idden +▁draw +yntax +▁attempt +URL +pose +▁indic +ника +▁English +▁déc +▁needs +▁normal +urt +▁но +}}\ +last +▁Fin +▁Febru +ila +▁country +▁fields +▁max +lés +owie +▁deux +▁built +▁Main +▁camp +ivo +iva +icy +zione +Node +▁:) +▁among +▁Ob +▁cases +haps +sers +arter +ści +▁iter +▁named +exec +▁season +tot +=> +graph +▁nil +acional +▁NULL +▁special +сте +css +▁\( +vs +ael +▁city +ova +▁article +▁South +Action +ça +spring +itude +▁complex +▁что +build +gamma +▁Ent +iers +'. +car +apache +ingen +Input +:  +▁dynam +alls +show +|\ +▁wird +Bar +alth +model +Trans +Row +abe +▁lib +null +ragment +▁State +▁law +Frame +▁Lo +geb +}$. +▁needed +▁contr +aries +▁screen +yr +mm +▁shown +▁bad +▁cast +▁Test +▁Auf +▁quant +iga +▁ren +▁Mac +▁transform +▁difference +▁tit +TE +▁step +▁capt +▁collection +ictionary +▁Tom +rier +▁move +cope +ords +▁further +▁columns +▁Lin +▁fixed +▁children +MS +mo +una +▁individ +tty +aste +src +match +wi +▁х +▁ди +▁ord +iving +▁Bro +▁almost +▁Pres +reci +aring +▁/// +ется +▁sig +light +▁Red +▁suggest +olf +▁été +isation +зна +New +стан +LA +unicip +▁figure +mt +iale +▁catch +default +▁tele +▁matter +cast +▁Rich +▁handle +valu +$- +об +▁json +Create +▁exam +аль +ют +ored +idos +append +▁Array +кс +}[ +rive +▁club +mann +▁este +esta +▁Gi +▁Jap +▁Name +Column +oups +ismo +▁City +▁classes +▁infl +hl +ром +▁adding +▁fail +xx +ões +Sc +util +▁location +lege +ago +▁properties +abil +vas +}$, +itted +ód +▁Dem +▁asked +▁tab +Source +▁errors +ographie +▁жи +▁mal +stract +▁dro +rak +▁note +▁setting +▁fem +▁saw +iar +HER +ес +▁pred +▁Out +▁items +лан +▁werd +ersion +lia +▁sin +ichte +▁feel +▁пра +▁oder +UE +ocument +▁mode +▁Na +ден +mes +framework +▁auto +ным +uby +▁template +▁mess +ieder +▁related +oken +▁follows +search +ami +▁wait +igr +▁low +ских +ская +▁Mark +▁ill +amento +\< +▁df +osition +▁Ви +isf +▁Deutsch +ahl +war +itect +▁sal +elen +ById +▁gru +sv +▁passed +▁añ +Sch +▁solve +weise +atos +▁meg +▁member +ername +▁connect +ips +▁round +▁] +nes +▁dir +▁London +dy +FA +▁received +reet +▁Log +▁School +ango +▁These +▁Mont +▁ener +lad +▁define +sign +▁cle +figure +▁View +textbf +$\ +зы +number +▁din +eller +orithm +false +fol +fficient +▁HTML +liche +▁Mo +▁introdu +exp +▁strong +▁thus +/) +▁ele +▁так +▁па +▁dont +▁cause +Number +▁images +▁sample +▁sci +like +▁Lou +div +anc +▁front +nen +▁missing +aria +pres +▁пред +DI +filter +▁Mit +UR +▁opp +▁sql +▁року +eren +emat +ís +▁Jean +éc +▁ci +enne +atform +▁taken +▁Of +▁насе +▁err +OP +From +Default +▁General +wiki +▁grand +▁einen +Reg +Handler +conom +anger +▁был +▁Los +▁expression +ша +yal +▁$(' +▁switch +▁vector +▁Thom +▁virt +leased +▁cover +▁resp +ako +rench +ota +Cell +anged +▁+= +lac +ska +next +▁International +▁Wil +▁ont +ibr +ustr +▁black +▁selected +cher +▁liter +root +лся +▁Life +▁insert +▁matrix +ises +)] +▁pel +Override +rypt +▁former +▁Film +▁North +client +▁night +ходи +▁Austral +▁Ret +rho +▁пер +ipedia +▁express +▁third +▁major +▁grad +owe +▁believe +ournal +▁status +unc +▁dou +▁JSON +uis +▁population +enz +▁William +sf +▁Object +▁cin +▁Di +curity +▁Open +▁ле +lar +adding +▁kom +}(\ +▁kil +umer +"/> +▁feature +▁Are +cks +▁Internet +▁ih +▁started +▁early +▁began +TH +python +asp +▁Fr +▁clos +istic +▁music +▁dig +▁ital +▁David +▁website +▁controller +▁Mer +context +product +osp +▁▁▁▁▁▁▁ +▁jun +rown +▁Az +":" +▁aan +▁Date +mult +▁browser +ред +which +RA +quare +▁Russ +▁soon +▁Pre +tau +▁week +▁ба +▁oct +▁town +roy +▁els +blic +undle +▁Histor +▁foi +▁models +зо +onym +Param +▁Met +gener +ją +▁espe +CE +▁device +ellow +▁debug +érie +using +анг +▁*) +udi +▁Miss +ком +posed +▁zwe +ін +▁Robert +▁Oct +lop +jar +▁aver +▁habit +▁:: +äng +Start +▁pow +▁src +▁pattern +▁Э +▁bi +otes +▁__ +▁sens +▁avoid +example +utt +Label +tex +boot +esto +▁March +▁easy +icture +Group +▁father +▁updated +▁Vo +▁III +omega +▁alle +Rec +yg +зе +▁Dim +nect +▁Tor +▁deutsch +▁white +▁national +ppe +▁air +▁password +det +▁big +▁Use +call +▁extra +We +ania +▁hold +Control +▁CO +▁мі +iti +▁Ke +enu +▁Park +том +▁auth +▁center +Ph +тов +iding +▁across +▁song +▁phys +▁numer +ща +▁Alex +▁problems +▁Error +format +▁Acc +▁six +▁db +▁Cast +oms +project +▁vert +cret +▁header +▁stream +ids +▁tor +▁sept +▁estim +▁decl +▁gave +▁player +ysis +▁дру +amm +що +▁(" +▁ax +Property +usr +▁someone +▁impro +aden +rote +▁Ми +ih +++) +▁video +▁exists +кла +▁complete +▁session +▁constant +icos +▁pack +rome +egr +Application +▁yes +▁elle +▁email +orf +case +▁pointer +▁regard +sen +status +▁mes +▁delle +ington +▁Bas +)^ +develop +▁force +▁characters +▁cross +▁death +▁takes +éri +igne +чен +UP +.: +Thread +ju +iny +▁details +▁xml +tait +output +message +'' +▁British +ville +▁Div +▁User +cm +чно +column +eqref +ór +onom +▁Post +ellen +Ab +ulté +▁perfect +(){ +vision +active +lier +rij +sd +▁kö +▁nie +▁relig +▁ot +▁machine +▁held +)$. +======== +cker +вы +born +▁past +рия +▁Dr +▁regular +▁provided +TER +▁univers +▁gets +▁nu +▁/* +ober +fin +▁nella +▁become +▁`` +▁history +▁Sol +▁Rad +▁terms +▁events +lymp +))) +рова +▁absol +▁soft +links +▁hope +▁subject +"), +▁creating +▁} +▁Sk +▁flow +▁Ра +▁assert +zet +▁Frank +sa +▁distribution +cu +band +izz +▁job +iner +struct +ák +TO +auf +▁extends +▁Gra +display +▁signific +oney +source +microsoft +inder +▁quick +▁wonder +Instance +elles +ème +▁company +uß +.} +▁separate +UM +HERE +▁writing +itution +▁Gesch +мя +▁James +▁DE +▁Spe +process +Str +▁sym +▁ao +▁wy +▁anyone +▁Up +useum +aron +▁definition +▁`$ +▁fav +ributes +▁Ré +ografia +element +cap +pat +▁Bra +)( +▁according +ге +▁pie +eli +}" +▁activ +▁stop +patch +ті +▁Jose +End +▁prze +▁age +itory +▁PHP +agement +▁`. +▁pretty +▁recomm +▁sud +▁requ +▁обла +atives +▁High +áz +oul +rest +▁Ter +under +thern +center +▁ur +lat +▁interface +▁ин +▁whose +icas +amen +Filter +▁station +Page +▁arm +▁eyes +▁рай +▁seu +oli +win +lik +gex +chan +idence +args +aking +▁Google +▁Stud +▁ho +торы +Su +▁automat +ême +▁cy +lor +▁stack +▁SELECT +AF +▁>> +▁compet +▁pair +▁inglés +Response +▁Fig +grad +▁documentation +▁cant +▁appreci +ån +▁learn +▁indep +▁pal +package +ares +▁Berlin +бли +reich +ён +▁satisf +▁region +▁friend +▁George +▁Во +▁"" +▁desde +Factory +▁County +ouv +▁‘ +▁installed +▁wanted +▁Python +▁interpre +▁included +▁(( +▁altern +isto +gn +▁border +pdf +▁dup +▁download +just +▁members +child +▁pay +▁cer +▁looked +▁correctly +auth +▁стан +▁esp +▁desc +eben +▁questions +mal +▁abgerufen +▁Band +▁[] +Base +▁ris +▁fort +▁Id +▁various +▁League +▁Hand +▁Type +irl +▁Fe +ién +itter +▁fast +sta +▁except +icz +▁French +▁environment +▁conse +ур +ого +▁necessary +target +▁reading +home +zeich +▁equal +▁più +▁prem +▁difficult +▁unit +▁replace +▁heart +▁talk +AM +▁RE +▁Person +endency +▁imm +▁human +dn +▁Kir +▁Aut +known +▁frequ +system +лав +▁Sz +▁Gal +ное +selves +rightarrow +▁Са +="@ +▁building +import +▁fam +▁delete +aire +mary +▁fund +▁particip +▁syn +sin +▁lower +▁zero +▁sec +▁fra +Point +▁failed +iento +cup +▁slow +▁nation +ähr +▁info +▁Public +▁decla +▁Та +▁sold +▁Rem +▁Phil +стра +▁mehr +▁Work +▁Nord +▁fait +▁gew +println +obile +▁Kon +▁assume +lands +▁amount +▁Press +ých +▁maxim +▁Champion +library +añ +▁Wal +Comm +]] +▁zw +▁social +LI +▁Unter +vor +Delta +email +raint +oni +▁alt +▁né +ция +ography +▁mentioned +▁<= +▁cette +▁currently +vare +izing +▁Def +icol +ünd +▁configuration +estig +III +lam +ière +▁Ear +▁tu +Ent +▁Using +▁ком +cie +▁proof +▁invol +▁History +>< +▁AND +avy +▁relations +${ +▁comes +▁direction +▁June +▁Way +Component +ech +▁Peter +sg +▁stra +uct +▁implementation +attle +▁cz +plot +▁played +">( +▁ground +unn +rod +spe +ursor +▁leave +erk +▁tal +▁bottom +IO +▁popular +igo +▁Time +values +▁Loc +▁Club +▁anche +iał +ії +Omega +▁located +Url +▁Esp +лы +ць +ulate +▁join +aves +vet +lio +remove +▁token +▁optim +▁claim +ological +▁css +▁although +▁priv +▁Ba +ül +entication +▁ven +Server +▁Cong +NET +CON +dt +perties +▁epis +wikipedia +▁engine +▁fer +getElement +▁Cla +ří +▁rom +varepsilon +▁prime +istry +pected +orage +▁touch +▁[' +▁dan +Em +aciones +Can +▁whom +▁behavior +▁strings +▁Europ +▁Rom +circ +▁pun +▁register +buntu +rain +Ob +TA +▁sometimes +▁ment +▁integer +▁Jac +legate +othing +▁sound +laces +▁Ба +rb +di +ления +▁themselves +▁Black +▁settings +▁norm +▁runs +▁NOT +KE +▁perhaps +▁Я +▁mol +▁ans +atre +▁Dies +Token +anie +▁allowed +Range +▁Gro +via +utorial +ensor +estival +); +краї +▁turned +scope +▁bien +=$ +▁extension +atore +▁Ро +▁specify +edu +Datos +▁stored +▁parse +▁answers +ills +▁heard +lu +▁THE +▁gén +▁ful +ez +▁Prem +then +dp +ського +▁Si +ço +Edit +ків +▁Ли +▁Sing +▁categ +Equ +▁guer +Width +▁Christian +stat +Write +▁woman +wood +Vis +раз +▁$$\ +oder +▁bool +▁international +ность +▁Richard +▁addition +▁Music +▁aber +tó +▁hier +ugh +▁pob +▁tables +Do +▁higher +psi +rá +▁active +▁Table +ње +▁description +▁seemed +íst +▁myself +▁menu +del +▁ž +ele +Aut +▁гру +mut +oon +asc +bug +▁moved +CL +▁datas +SO +оло +▁Georg +▁reach +:" +▁evalu +▁Hel +▁River +▁Ар +//// +▁sets +▁Olymp +Adapter +.' +overn +▁Lord +!-- +jpg +imento +▁Prof +▁achieve +}: +▁incor +▁onder +engl +ABLE +▁Mary +▁waren +lage +Dec +англ +encias +лей +▁Machine +▁Ан +uda +▁ś +▁XX +only +ление +▁también +nej +▁relative +▁hours +▁indeed +undo +ingu +area +▁Create +beit +▁removed +master +haus +▁Bern +▁speed +▁Bay +▁Att +▁None +application +üd +▁fit +▁Maria +▁nord +▁split +▁stru +▁official +▁execute +ouve +{{ +▁Ap +▁ку +IL +▁^ +dim +▁setup +ск +▁share +▁minutes +gle +oco +stell +▁Coun +▁temper +keit +ський +ao +▁Long +(& +кан +▁dens +But +XX +DATE +gan +.). +▁entry +install +▁зна +▁Som +Command +ßen +▁starting +▁sto +IG +▁minim +▁explicit +▁bytes +▁party +tober +▁Grand +▁Vor +▁leur +Document +erc +ensive +CP +env +▁arguments +▁Gran +arily +▁lin +tn +(- +geq +▁Famil +▁Бо +▁tour +▁nav +▁properly +▁Mrs +▁Mel +▁scale +astic +ds +▁Sir +▁Church +}^{\ +you +/. +So +▁brought +▁role +▁Sur +▁fond +▁ges +że +eten +▁était +SER +▁которы +▁equation +aspx +▁Afr +▁dit +empty +alement +wrap +▁Bet +▁collect +▁git +▁vie +▁.. +рой +▁ +▁Ва +nost +▁nem +▁pen +Open +▁church +кон +▁average +▁comments +▁corresponding +levant +▁bed +▁meaning +Version +Link +bel +▁extract +ść +▁IV +▁Ir +▁computer +▁affect +▁Ста +AX +sort +▁species +▁Oper +▁hash +ches +▁Einzeln +▁keys +▁marzo +▁interpret +hood +▁coordin +ös +rage +etz +iza +дер +üt +^* +▁modify +▁termin +▁cred +zon +ную +▁mie +▁'' +▁Mos +▁connected +NO +▁compile +▁"\ +▁cat +fiddle +uta +Access +▁Sto +▁Bur +▁north +Gamma +▁alloc +Init +▁Link +ialize +Impl +oupe +ropri +▁Gold +▁solo +▁Dist +,- +nav +▁alert +esis +▁Os +/// +▁feb +▁--> +foot +▁Fried +▁Einzelnach +▁rev +zeit +▁Stat +▁Seg +▁blo +wick +EL +caption +header +▁president +▁multip +▁Einzelnachweise +▁seine +?” +Function +▁Stand +▁Function +▁?> +▁Bill +▁spect +▁redirect +rupt +▁walk +вши +springframework +place +ého +Entity +▁Service +inte +▁training +▁(` +фор +▁кра +aur +▁fetch +▁† +▁même +▁(' +atively +▁execut +äch +▁Catalogue +based +Attribute +▁spring +phone +тра +▁пи +тера +▁`\ +▁Od +One +send +bon +▁° +MO +▁asking +▁où +▁ingår +▁testing +▁фа +▁Book +imm +▁progress +bro +First +▁phot +▁ON +Template +developer +annot +▁>= +mission +▁któ +pc +bach +zent +ued +▁ones +ји +▁rout +▁Ки +Post +ції +▁Vir +nek +aging +▁ок +izont +▁agosto +▁choose +▁ +▁systems +loss +iente +▁Cre +▁contra +ums +▁beginning +emy +istics +▁served +Down +options +▁Govern +▁BY +▁jest +té +▁continue +pers +▁easier +▁cos +esso +>> +Net +▁Bor +▁Cr +▁transfer +▁CSS +▁finns +▁хо +username +▁constru +▁pain +▁Tem +▁specified +▁brit +ские +irk +rapper +▁counter +▁[" +oded +дан +property +hard +istrict +)/ +▁Pour +▁Where +▁=== +▁sowie +▁Про +▁dess +▁tras +▁уча +▁Over +note +▁America +cp +▁grande +Me +)- +Mode +▁passing +▁giving +Cl +}/ +Menu +!! +angular +▁launch +varphi +▁Johann +▁foreach +ró +sequ +ifi +Am +arp +▁buffer +▁ni +▁mix +▁Museum +▁meant +asi +▁kan +прав +Comp +istoire +iful +jer +issions +Resource +▁воз +▁ST +▁solutions +▁belong +▁Associ +cf +▁Mär +▁grid +Mult +▁requires +kk +▁teach +emeinde +▁square +▁коман +▁Event +▁rules +▁bur +▁eing +▁Mai +▁nam +▁slä +hör +▁tip +▁Literatur +▁scope +overline +▁exit +)? +bet +▁vict +Off +▁approxim +▁Geb +ktop +heit +▁Ю +template +рон +▁uno +Serv +▁framework +operator +▁generally +▁hundred +▁divers +ovi +▁rés +abs +▁gal +çais +▁feet +▁virtual +czy +ску +./ +hu +ancy +▁recommend +▁під +▁money +▁versions +▁helps +▁Hor +Items +look +connect +anges +ViewController +elijk +▁occup +▁editor +auto +ög +▁seconds +▁obvious +vm +akes +▁gegen +▁til +jection +лення +▁operations +▁East +ogy +▁Polit +uten +▁Joseph +"` +▁Company +▁callback +▁sen +cción +▁associated +▁containing +▁practice +elijke +oke +éra +uns +anta +vey +zu +▁Bes +▁Flor +mem +ycz +▁architect +▁anni +▁contact +YPE +▁Cas +▁полу +ovo +▁bring +▁concept +▁js +▁Referencias +emble +▁н +▁supported +Big +▁Hans +erv +▁Maj +▁arriv +▁Have +▁probability +▁Pop +▁Pass +token +Provider +▁Ra +Reader +ooth +lap +▁assist +adow +▁tests +сси +▁king +langle +▁Sum +OIN +▁security +nis +../ +▁basic +unity +`: +▁кото +kow +▁Bibliothèque +asion +alo +ifest +▁novembre +▁peu +▁Ж +enschaft +clus +ју +Height +ún +▁tur +▁ideas +▁ces +frak +▁premier +itation +▁sé +HTML +▁Royal +ської +▁byte +PS +▁segu +inen +▁Great +▁Ку +▁external +Title +Top +Process +ität +▁`/ +▁secret +pository +▁potential +▁Bud +names +asons +stackexchange +background +пер +сов +after +▁pero +▁software +▁sed +▁arrays +tmp +▁asp +scale +▁Lat +anal +▁gem +PU +▁Altri +That +▁Ни +ifact +Address +▁south +▁formula +▁Colleg +▁ін +ktion +▁sac +SH +ajo +etc +vc +`]( +▁Dur +▁Ме +▁Smith +items +CK +elo +▁plugin +▁serie +ienne +▁или +Mar +▁Image +got +andas +▁matches +▁worth +▁Deb +▁cache +▁felt +ersch +izes +Oper +▁Jahre +▁commune +thread +▁ny +dec +ouw +▁surface +▁Por +▁Street +при +▁candid +▁Return +▁Kom +gru +▁ти +[\ +▁depends +▁influ +▁towards +ained +▁rank +▁Januar +▁components +gest +getElementById +▁checked +airs +join +▁dead +▁hit +ény +▁equivalent +▁Пре +▁appropri +Pass +▁primer +englisch +▁appar +▁During +▁knowledge +▁trigger +▁core +▁Ol +▁Produ +▁Fern +▁нача +Te +▁Mot +erve +тво +▁mid +▁finally +aires +▁especially +▁tut +▁receive +adre +▁neigh +ktet +ilde +▁radio +▁driver +лись +endencies +▁IE +▁saved +ffect +▁Wayback +iat +▁padding +window +тиче +▁mur +actor +▁Han +ональ +▁gar +▁familjen +ós +▁nationale +▁pré +ded +onal +▁President +▁\, +▁placed +erni +▁signal +nab +hm +Mon +▁vs +SC +▁progetti +▁Ü +▁forms +▁messages +inf +users +GET +▁dels +Collection +▁Good +▁Maybe +▁compr +▁larger +gres +aper +▁При +undes +▁sea +▁Spring +ulo +▁mechan +▁sans +GB +Valid +▁communic +▁pra +vier +▁Се +▁ain +тура +kom +skiego +ково +adata +▁Ре +▁boolean +sets +▁effort +.[ +▁został +PA +▁Vict +SD +ował +▁emb +▁prima +▁hour +subsection +▁Fort +mathfrak +igin +GL +)+ +fi +▁anci +▁pan +\) +▁lug +▁deploy +domain +▁slight +JSON +▁morning +▁hi +▁compare +ije +▁blue +▁Ac +▁middle +anden +▁shared +▁Camp +▁Á +ounded +uw +ierung +Stack +▁eines +▁Da +lij +enti +▁й +Util +▁experience +▁await +uls +▁requests +▁impos +▁constraint +Change +emph +бер +▁Another +Custom +▁significant +cr +▁million +reek +▁dalla +▁Germ +otal +ateur +btn +▁thinking +▁interval +onne +▁liv +(): +▁Ве +oe +▁Ev +meta +▁broad +Rem +apply +▁couple +▁techni +idades +▁goal +▁CD +hab +▁explan +anner +▁Because +blog +includegraphics +▁voice +▁Map +vention +Session +▁Liens +▁sor +category +ashington +▁März +pop +illet +▁zwei +▁Lie +Null +address +▁factor +▁ligne +▁HTTP +▁suf +▁personal +cip +▁Dar +▁adm +кой +▁Ext +▁god +aa +Right +été +▁dynamic +▁maintain +tor +######## +▁Fra +▁choice +▁сто +СР +▁Feder +ston +▁flag +kit +Module +▁спо +▁Stra +icks +▁haven +▁Mass +▁Emp +▁Pi +▁Pen +Rect +▁Kr +itat +eler +ября +itet +▁Start +▁produced +▁пол +(_ +▁delet +▁hot +▁Geschichte +~~ +▁months +▁tod +▁ни +ús +temp +▁Dez +ypes +▁cui +ommun +actions +▁eigen +▁immediately +PL +▁Го +▁Bal +ље +ului +▁online +▁años +▁namespace +▁mond +▁Base +▁Canada +etzt +}- +▁defin +▁doubt +▁investig +views +▁Line +▁stage +ettings +ubre +float +▁Play +▁Las +ptr +▁becomes +estamp +▁independent +▁analysis +▁Look +lain +▁рас +Reference +▁sorry +▁supposed +ût +▁degree +utz +MM +▁desired +ły +▁len +▁alone +signed +▁Sta +Person +▁applied +▁Back +▁mars +Part +▁Did +▁externes +▁np +ongo +▁esta +Block +▁pou +adores +▁Studio +.$ +▁reached +bot +▁Juni +tons +itel +▁Gar +▁articles +▁District +▁trouble +lide +▁Found +ád +▁equip +▁internal +'], +▁async +UB +gel +▁ai +ensure +▁appeared +▁$_ +▁maximum +▁Си +рь +▁announ +лась +▁cm +ган +aupt +▁latter +▁platform +▁dra +▁capital +▁solved +riz +edic +▁Mur +▁Top +тся +Panel +rule +etic +▁Ren +▁Wikimedia +▁TO +second +isl +▁hy +▁niet +▁loaded +dig +▁mayo +[: +Acc +▁bek +нию +login +tx +▁Fur +▁Santa +azz +▁conduct +▁India +Order +irth +tw +}+ +▁wieder +▁Edu +AV +▁``` +▁manually +▁Read +fortunately +▁Run +▁Award +▁Foot +*) +params +пі +▁native +rift +▁ä +ATH +▁yourself +▁prior +▁cit +äh +▁treat +▁meas +ributed +▁clar +card +ROR +illes +▁layer +auer +▁rat +bernate +▁stato +▁China +▁$('# +▁naar +zip +▁${\ +▁appreciated +▁име +ży +▁przez +▁Indian +▁Tod +▁Source +▁други +internal +ionale +Product +▁Men +▁upper +▁Every +},\ +▁printf +▁continued +▁nodes +лки +▁nice +modules +eign +▁Mex +▁According +▁undefined +▁binary +cut +Current +edy +}}{ +bles +▁вой +scri +eqn +Changed +▁köz +▁remote +вля +▁quel +▁align +▁пар +SV +yer +▁Californ +▁places +▁primary +▁conv +▁Juli +▁visual +▁Select +atory +=( +iser +▁intent +sur +container +iced +▁board +astr +omial +вет +зва +▁cru +▁Oktober +save +▁greater +▁inn +▁picture +▁То +▁obtained +Wikimedia +úblic +▁lors +▁mont +obre +▁civil +▁construction +▁Welt +▁Under +undert +▁edge +▁Liste +csv +▁experiment +localhost +▁Edit +greg +ová +ља +msg +▁Green +Dialog +Ident +▁JS +^{( +▁släktet +____ +Project +▁beskre +▁ber +▁wouldn +▁react +Hel +zw +▁Washington +orie +task +▁category +▁artist +anno +▁ook +ammen +▁Minister +▁declar +▁Key +,. +▁mach +▁ww +isen +Fran +▁Росси +бор +три +▁rock +quis +mos +пера +▁esterni +▁gold +Windows +%% +▁partial +▁weight +▁spr +}). +▁français +fun +▁thous +holder +▁gone +▁Č +▁rend +DA +▁answered +▁False +Buffer +▁daugh +.-- +▁Show +▁rect +▁Kre +dr +osoph +▁yield +urity +toString +aval +Pol +▁lock +imation +antic +Local +▁beskrevs +ités +grid +ут +▁_{ +сі +FILE +▁км +▁speak +summary +prop +javascript +zk +izontal +▁trois +▁Rod +prise +рово +▁odd +▁gest +▁produce +▁waar +▁Av +ribu +вання +▁finished +▁adapt +▁Sar +textit +▁Ce +▁Fa +osen +▁deriv +▁ship +▁opin +▁Even +gesch +▁suppose +▁Fer +ское +▁worden +sey +hline +▁Union +▁/** +▁vez +▁Collegamenti +▁Society +▁econom +ší +oi +▁orient +▁Teil +rent +лекс +▁solid +▁cart +**************** +▁cab +▁Message +dots +▁ég +▁twe +aga +▁naz +▁Microsoft +▁underarter +ppen +▁recent +▁net +▁resources +Ste +.\ +▁SO +лом +▁cele +▁lic +▁benef +ldots +▁serial +Integer +cles +▁miles +▁Ale +▁entered +▁Two +wie +▁includes +▁Each +elling +quer +▁Dom +pf +WS +▁straight +▁Stan +▁nos +ícul +atro +▁Center +FT +▁Inga +ilo +▁www +jsfiddle +nic +▁European +▁commer +▁girl +total +▁Star +▁suggested +pal +▁zwischen +писа +IM +▁handler +▁Program +xsl +ály +BU +,-- +▁vid +▁established +▁Spiel +ometry +unes +▁sit +▁inher +▁puis +▁être +▁Most +Header +insert +▁sist +▁favor +dest +▁entity +Cal +▁Therefore +DD +;; +▁Dezember +▁Rh +iments +▁returning +sto +▁Value +▁liber +▁Result +▁bind +voir +▁Tim +▁Movie +weg +ket +▁исто +▁friends +▁fn +▁él +▁&= +arden +fficial +▁community +▁api +Args +ieren +▁dann +omorph +adr +loop +uman +▁vous +bst +submit +\| +тин +Container +asket +?) +Sec +▁drive +Ass +▁swe +▁amer +▁mine +▁Ham +▁avait +▁Hon +▁après +▁Mann +ська +▁increase +▁ty +sky +▁accur +article +weight +▁sex +▁listade +/** +▁está +}}$ +argo +define +▁состав +session +ads +стви +▁Law +▁dialog +▁duplicate +▁ép +▁voc +fri +▁green +▁hidden +▁Island +▁diag +owej +mysql +teil +rä +ikan +▁José +aled +Runtime +▁train +▁Division +ниц +▁Span +нима +)=\ +тан +▁stay +▁foo +▁accom +▁hers +▁нау +▁Mün +ideos +static +▁ready +]` +▁visible +▁Hope +ulated +▁Cult +стро +Co +▁smaller +atura +▁perfectly +req +▁proposed +▁degli +Search +▁ich +Max +▁volume +execute +gre +▁sport +udad +PT +▁Records +▁cook +▁expand +бі +▁altri +ppet +arse +▁wet +▁Bob +▁FC +▁Association +uje +▁fel +▁слу +▁Big +/\ +Ge +while +{( +▁sufficient +Position +▁understanding +▁nue +▁raz +▁ye +hem +Num +▁Project +▁Its +▁hasta +enso +▁wire +Ret +uj +proof +▁relevant +▁partir +▁ago +ificate +▁domin +▁boy +▁plant +▁encoding +▁throws +▁Rock +zone +gang +widget +▁interesting +DER +▁demon +▁office +amt +äter +▁White +▁versch +▁dieser +▁Mount +▁students +▁Pub +▁Де +ija +▁Cy +▁California +▁abril +äll +▁чем +TV +▁més +▁declared +▁ю +ől +appa +▁Бе +echo +numer +▁posted +▁вер +▁године +▁weak +▁Republic +▁champion +ensuremath +your +▁Ober +▁Central +isa +анд +yy +▁fully +▁SD +▁Linux +▁Scott +partment +kon +▁contract +▁OF +▁ale +▁Ann +▁над +lah +▁Next +oren +▁disk +▁eg +atu +логи +▁games +Left +▁lu +▁finite +▁ки +▁crash +pher +exe +ATION +▁brother +Eng +tat +▁Integer +ному +▁colon +iqu +)). +ivi +▁Method +arten +Uni +vector +▁wood +рт +▁Ле +▁siècle +▁gent +} +▁contents +▁compan +Go +▁jou +uent +Async +printf +▁Model +▁kept +ASE +▁provides +▁Abgerufen +▁Gall +▁Alf +SA +▁Mem +▁kter +▁Bru +Android +(: +▁Украї +Ne +Min +atr +▁Hal +delete +odo +▁não +ène +▁calculate +Json +keys +ней +▁hence +▁ow +▁Lib +eno +▁Love +osi +wide +▁score +full +вод +▁determine +▁spaces +лова +▁peut +éral +ół +▁appoint +▁Tw +(); +▁pure +▁embed +ação +controller +▁married +▁Fol +famil +▁prec +▁recurs +pad +istration +▁respectively +[$ +autor +▁grav +iera +azioni +▁Bul +▁Australia +mond +▁Tro +▁Ele +packages +msdn +▁Als +▁przy +ART +▁charge +▁applications +Unit +aren +▁sudden +ometer +▁dot +acji +ктор +imin +ening +▁donde +▁Ho +tree +mb +▁drag +aje +▁invalid +▁finish +laim +▁feed +▁Nap +room +images +▁сай +▁succ +iffer +▁año +▁cual +мери +DR +▁Bilder +бра +rait +pan +ень +▁distinct +▁Kn +önig +anced +▁loading +▁Techn +▁Sel +mus +▁rail +▁student +▁notice +▁sla +▁Да +▁guard +▁Day +вали +Option +aison +ipp +▁Jun +▁fell +▁absolute +ове +debug +▁Sud +пы +ugins +▁views +lay +▁surr +▁stood +▁ві +selected +гі +▁attributes +final +enda +▁Bon +ners +▁Wer +bur +ittel +▁moving +▁Plan +isches +Java +▁basis +▁Bus +▁Au +▁Ill +▁время +▁цент +handle +ступ +▁Far +▁oraz +ocr +▁seit +onder +дом +:/ +chor +▁Town +▁definit +react +▁piece +▁Karl +CI +▁Application +unter +▁formed +▁пу +Bo +▁Daniel +▁пла +Body +})$ +▁были +▁earth +гла +There +▁стра +▁ville +▁centre +) +▁helpful +▁++ +▁CG +izione +▁Game +▁Which +▁pip +▁Portug +DS +▁describe +▁checking +▁manager +BO +▁Bundes +buch +▁decided +▁Jahrhundert +▁fif +efficient +anci +braries +▁fails +▁kernel +▁Gl +▁Nacional +▁proceed +▁fuer +▁living +▁successfully +▁faster +▁contre +▁prison +ORT +help +▁autor +ław +ają +▁Arm +▁provin +▁naam +/# +sed +▁gesch +▁мар +esk +term +▁Tex +iring +▁tools +PDF +▁ult +issenschaft +▁couldn +ding +Dep +{- +▁predict +antage +▁Like +▁Би +tools +estra +▁ki +▁Jim +star +▁remark +óg +nabla +▁Although +mode +Host +▁strange +None +black +▁Festival +▁IS +anza +▁(- +icket +кола +▁Jes +▁flex +▁À +▁Network +▁EX +▁enero +!” +▁Ort +▁alors +▁Original +▁zo +ными +▁spl +Draw +yond +── +▁Ot +▁dram +▁division +▁efficient +▁Га +▁vier +nak +LS +▁spirit +zeichnet +▁dici +clear +copy +yar +▁році +usqu +▁nous +▁blev +жде +Arg +▁performed +▁Make +▁Carol +etto +▁Sand +▁Disc +Enc +rero +hash +▁focus +▁attention +▁agre +▁divis +▁было +▁ej +▁march +▁phase +ías +▁phil +▁Pap +▁river +▁caused +plugin +▁Team +uler +▁$("# +iej +ISBN +nam +▁fight +vid +▁Lud +Selected +:@" +▁Pod +▁années +arios +▁deutscher +▁NA +▁ию +▁dictionary +▁Ла +▁Tri +èn +▁political +ridge +atten +▁circle +▁transport +emas +FC +▁replaced +▁Aud +iska +Configuration +▁soort +▁Не +▁sequ +PRO +▁bud +▁{{ +ließ +▁Mas +ders +usammen +esa +▁Ly +вро +mac +▁испо +▁suc +uy +▁illustr +▁primera +ilation +▁storage +▁params +kaz +▁terminal +раль +▁holds +лось +▁nad +”. +▁octubre +bul +▁hus +ULT +▁également +▁Mill +ład +▁contiene +"? +▁>>> +Que +   +▁plain +ativa +ocker +Names +▁Jud +▁agree +▁Gemeinde +lare +каза +▁starts +▁price +Target +cus +▁Instead +.; +▁alternative +▁вла +IE +▁organiz +inu +▁completed +▁carry +atom +▁depending +▁Our +▁insp +▁&\ +aily +irection +фа +▁defe +TAC +▁designed +▁voir +break +▁partie +▁Jahren +▁studio +▁jour +▁Notes +fire +house +success +▁Juan +JS +▁Custom +▁besch +▁stated +bootstrap +ött +ozzá +▁CON +hav +▁sleep +eda +hot +ánd +▁Sy +▁temps +amar +▁scal +▁ast +▁opening +clipse +▁programming +▁letters +▁profile +nah +▁beyond +▁Further +faces +▁chart +зда +aign +ній +▁Rol +овано +terior +wed +▁herself +▁ng +anguages +}=\ +ynamic +▁jug +▁Example +▁(† +▁playing +▁usage +▁managed +▁Natur +тери +▁Et +eria +▁daughter +нием +Fragment +▁hol +Fl +ографи +▁ihn +üh +instance +▁comun +▁truth +▁само +▁implemented +▁anyway +▁Cro +фе +GC +ubuntu +types +ês +.~\ +fold +▁joined +?? +▁mé +▁wild +клю +rowser +▁Home +skiej +▁JOIN +▁juin +hof +▁dataset +жду +')) +▁miejs +API +▁edited +ools +▁seeing +ijd +▁procedure +▁Bras +▁signed +▁externos +▁disapp +▁Direct +cyc +▁consult +örd +Widget +cious +sect +▁Ди +▁wind +▁Archivado +aml +сс +Wh +kbd +▁Army +▁suffer +artifact +▁resolve +▁Sport +▁це +idas +▁tax +idi +▁actions +пра +pués +▁naj +False +▁chance +▁тако +äd +▁dol +▁env +▁basically +▁Council +zte +▁displayed +nil +complete +▁Lem +iance +▁основ +▁depend +plom +ensus +uts +▁Hot +bitr +▁validation +abb +▁тре +km +zd +öff +WE +▁interested +▁{" +aro +▁correl +▁dedic +▁lists +▁Bibliografia +▁earlier +program +▁première +front +Tab +ству +drop +▁fear +▁Enlaces +▁Capt +▁realiz +▁hal +▁instances +▁susp +illing +%; +{} +|| +▁partition +▁Build +▁wo +▁Пер +▁director +▁Sin +тия +rsg +ouver +▁nearly +oda +ктив +▁sir +IME +▁janvier +▁Win +Build +ieurs +INE +double +Last +▁policy +store +▁observed +▁familie +nica +rey +зь +▁Year +▁developed +▁Institute +▁reply +Comple +ician +▁Guer +▁dall +▁desp +▁Football +Empty +cken +unda +▁Ur +▁ig +▁Atl +author +▁Bol +zig +nat +št +security +onic +▁pes +itan +▁Extern +jan +VAL +▁им +bold +▁ва +▁Мо +▁disput +▁trick +▁ped +)^{ +into +Sim +▁parallel +fox +normal +inent +педи +hold +OK +▁chem +▁twice +▁username +ič +▁representation +▁journal +▁:- +▁batt +\% +▁certainly +▁Exception +eps +shot +ategy +Show +▁Carl +rig +▁reported +bottom +TF +▁Francisco +nap +▁Championship +▁court +▁sources +iour +▁conserv +dict +▁Ру +IB +▁Ve +▁№ +▁ER +")); +▁Point +azine +▁internet +дна +▁carried +▁Field +axis +▁Sun +▁ave +пис +ян +asy +▁julio +▁depuis +▁suggestion +[[ +▁Archive +ęp +▁Pra +reh +▁demonstr +фі +cmd +▁wasn +▁phone +upload +aya +тора +lines +▁indu +▁vot +▁espa +▁bin +▁после +plan +▁junio +orial +free +sterreich +▁ду +▁linked +▁enable +PC +▁density +▁Egy +yo +endre +▁съ +▁italiano +▁AR +▁Pers +férés +▁скла +Var +▁Once +Red +buffer +▁Enter +▁Š +imiento +Store +▁health +vat +IST +Oh +▁kw +▁riv +▁somewhere +ografie +private +кти +▁delay +▁Http +job +rael +empor +▁diciembre +ête +цу +▁commit +oso +Values +▁headers +transform +▁processing +rå +▁Ah +▁Node +------------ +▁faire +▁hun +Player +▁review +гда +▁limited +▁Property +▁serve +riage +▁Master +▁kann +crete +phere +ёр +▁chief +▁scene +kin +▁uniform +▁febrero +"} +illo +ITE +ouvel +usepackage +enth +▁quickly +Lambda +xes +▁cells +rog +amin +▁Мар +▁mayor +player +++; +▁Насе +▁safe +▁veloc +▁обра +Database +neh +Vert +▁fle +▁фор +▁foreign +Abstract +▁magn +▁modified +▁military +▁monde +▁Action +▁bank +Serial +▁continuous +▁gel +▁physical +▁introduced +uture +rick +▁presented +▁Prov +▁Both +Pos +super +&# +▁finding +nel +unde +▁från +skim +▁Hill +fn +▁Canad +▁intended +ozzáférés +▁juillet +▁Wars +▁successful +▁charg +iele +omething +oku +fetch +▁}} +bank +operatorname +▁Color +▁Card +tu +▁", +wid +▁gep +XML +================ +▁Virgin +ährend +licated +Dir +zero +▁Kal +▁Party +▁å +price +don +▁warning +▁Bad +▁Supp +▁Liga +▁Pierre +Record +ulator +▁Rome +▁theorem +▁entirely +ским +het +▁dopo +Next +mlung +wig +▁Ath +▁Sou +licher +▁sudo +ests +хів +▁septiembre +▁micro +▁trop +fit +Core +▁Radio +▁Organ +▁Power +CF +▁Last +▁oppos +▁offset +▁regia +▁minimum +▁helped +andon +ifying +ruit +enschapp +▁bere +VM +▁Awards +▁agr +ynomial +enced +▁devices +▁bot +▁firm +▁writer +▁ring +.- +istes +lä +▁mel +entation +▁Schw +▁nome +▁pobla +▁woj +▁ul +ento +ых +▁resist +▁remains +▁Ca +aña +▁Court +utable +entially +▁trat +▁Visual +▁restrict +▁previously +cation +▁осо +▁MySQL +för +cala +▁culture +live +▁accepted +Did +▁hous +▁selection +▁decre +margin +urb +▁Inc +▁Many +ibt +▁succeed +Binding +cí +▁Rog +▁shouldn +cloud +▁dz +вав +▁pix +small +▁projects +▁OK +▁latest +▁references +Program +▁erst +▁як +▁kam +▁Camb +ellt +öd +none +▁jusqu +king +▁Ped +assert +CS +rito +essa +лько +▁Von +▁Edward +▁impossible +np +words +ielt +▁Page +lers +▁pier +▁области +ittee +▁([ +▁trust +NG +redu +<< +rial +▁products +▁Ern +rière +гов +▁Reich +▁Road +▁nested +Display +▁strength +ografía +▁announced +▁Science +▁райо +Parameter +▁Task +uments +▁adopt +▁Only +ють +▁cli +▁lem +stood +▁FI +ências +ponents +]$ +comment +▁ya +should +ike +tim +ellig +▁sending +▁ajax +▁noviembre +umes +▁weiter +▁Dans +opp +▁septembre +otimes +ző +▁ep +vere +▁oh +:= +▁Song +”, +▁viv +▁queries +▁vá +▁décembre +▁unable +▁erh +▁`- +▁Lee +▁ersten +ôt +стве +TS +▁fragment +▁wide +▁suff +▁dut +▁Vere +іс +ading +iego +icago +▁Argent +orer +ennes +▁Leb +linux +acing +▁broken +tp +ío +abeth +istas +gew +ième +cas +▁preced +▁Dal +▁compared +equiv +illy +teen +▁Console +▁strict +itaire +▁ED +entials +▁perman +▁tous +▁geme +▁extrem +▁окру +kg +▁heavy +▁avril +▁anti +▁octobre +utf +helm +amples +▁(_ +aken +▁dear +▁opinion +▁fish +▁Alexander +iw +им +cadem +▁reflect +▁др +▁trib +common +▁clearly +▁saf +="@+ +▁Мос +сите +eqnarray +nung +▁relationship +▁Sem +▁killed +ted +uno +▁лі +▁wid +anning +▁panel +▁Leben +▁ruby +ansion +▁aren +tabular +alet +}$$ +▁Lake +▁suite +▁minor +Hozzáférés +▁xmlns +DIR +driver +ints +▁vic +AND +prim +сылки +▁Ox +TC +rivial +atie +▁eight +▁conflic +angel +▁Begr +▁explicitly +ются +▁Dev +render +▁reprodu +▁cré +Gu +MB +▁kön +▁remained +▁kl +хов +▁byl +Phi +▁detail +jav +▁mouse +Bas +ię +asser +hs +▁shift +▁últ +rand +▁btn +raz +▁pul +▁statements +filename +▁prompt +élé +ikz +▁Sus +▁debut +Stat +forms +▁Hein +stadt +ennis +пол +arante +цій +▁queue +▁reci +▁sta +ynchron +centering +Some +Graph +▁tested +▁Kunst +ом +▁Nothing +ieu +“. +Bundle +▁oficial +allow +▁React +▁Library +blue +▁verw +▁pare +▁Friedrich +▁aware +Exp +▁effects +▁горо +lopedia +▁Ven +rale +▁Final +▁propos +lacement +kten +▁novel +orter +▁Germany +▁django +▁transition +▁happened +▁beautiful +▁neither +▁libraries +▁hide +alg +▁aspect +▁forget +cademy +onte +refix +▁cloud +ned +cdots +register +nym +.): +▁Jew +▁très +ниче +▁Dor +▁proc +▁gan +▁є +▁Sav +ví +Settings +▁Vari +▁cours +Ro +▁conj +▁reasons +▁reader +лександ +icate +}), +▁tasks +▁Ray +▁ric +Ke +onie +rf +)[ +▁subsequ +▁Turn +▁VIAF +mathsf +HE +▁declare +▁protocol +▁PC +цион +ViewById +▁animation +▁confused +вич +▁enabled +owo +ást +öt +▁mand +▁Rail +fields +▁Kap +▁algebra +▁Су +férence +▁Current +сно +▁Lim +Params +▁Antonio +▁tv +late +ifer +Entry +▁Serv +▁musical +▁trace +▁scient +fic +▁forgot +video +▁older +Tree +▁uns +ники +▁Europa +▁Zwe +▁бе +▁vec +жу +▁▁▁▁▁▁▁▁▁▁▁ +Match +span +▁blank +▁später +▁Ty +▁dict +ña +▁confirm +▁vý +зан +Rel +film +▁Rot +▁Hy +ках +▁demand +▁minist +▁Madrid +▁usual +spiel +eros +▁tutorial +▁Ссылки +sys +циаль +▁spread +▁convers +▁roll +artifactId +▁Number +▁symmet +▁Mult +expected +▁axis +▁matching +▁food +groupId +Mapp +▁свя +▁vend +Found +otto +Cat +crit +istent +▁drei +▁ended +▁Tele +component +▁involved +▁Estados +▁danger +▁chain +▁Prom +hom +▁polít +cop +▁nap +rif +plements +▁vent +anna +anted +dated +anth +▁threads +зова +▁станов +▁eerst +buf +heid +▁Ru +▁Prim +▁migr +▁Unidos +▁arbitr +▁roman +ountry +ultur +▁König +▁annot +aching +▁Haupt +umin +▁hem +ckets +bau +ection +eft +▁packages +▁Kur +thur +▁pays +liament +▁Бу +▁cada +points +ocket +▁verb +лее +▁submit +▁san +ruby +▁east +kov +▁Verlag +▁spot +ppo +Each +jekt +▁Biographie +▁news +▁país +ufact +▁dia +кова +▁accompl +▁Ét +ilities +▁ihm +invoke +▁append +.), +▁lab +anging +istan +resol +▁Section +Parent +moz +Mat +styles +unden +“, +irtschaft +ким +▁Finally +phen +▁Pac +▁ArrayList +▁recover +▁education +models +ped +▁happy +чу +▁guerra +media +OF +▁ensure +Mark +database +oggle +▁publish +OW +▁Bau +?. +▁части +▁repository +▁Matt +high +oven +▁ger +▁unknown +Amer +▁Brown +ALL +▁resulting +▁bor +▁poet +ними +Email +Font +▁hist +▁today +▁Berg +▁buttons +тал +▁sni +▁челов +Cre +▁union +▁zich +ishop +▁quando +Po +CTION +▁Cost +судар +erved +Note +Equal +лия +бур +▁abstract +stop +▁advice +▁icon +▁travel +BS +vens +▁batch +lique +sheet +▁ihre +emon +berto +▁assigned +ью +Phone +▁award +▁functionality +alla +▁Dam +▁ciudad +▁cluster +Description +▁sheet +▁Australian +▁». +▁"< +▁wondering +aine +▁represented +kappa +nb +▁sy +▁Kö +="# +▁seven +Directory +▁sister +plates +▁luck +▁remaining +▁Vill +werk +anni +etti +func +▁ban +ims +miss +agraph +екси +▁Ref +nitt +▁Gab +▁andere +▁jedoch +results +!\ +▁listed +▁loro +▁knows +жно +Rad +▁socket +multi +▁рі +rails +▁tar +▁gentle +sett +services +bound +igkeit +aja +▁cmd +agger +▁ba +▁Belg +▁Kle +▁wordt +▁fost +▁dimension +Ang +uming +Obj +нен +▁Marie +exists +тро +▁боль +emente +▁Jon +SERT +▁highest +aki +▁tres +▁circum +▁Down +ommen +urer +▁causes +venue +issance +▁influence +▁fat +реди +}\\ +▁entr +▁Sign +▁кла +▁binding +essen +▁Фран +▁Local +▁явля +appro +▁dependencies +▁talking +▁zurück +connection +Active +bbe +irls +▁Inf +wd +▁ис +road +▁conven +ět +вез +▁entries +esc +▁bits +asso +WR +ships +▁dés +esp +Make +▁familiar +Art +▁army +ctr +éric +queue +▁\{ +uela +amiento +ших +▁""" +contr +лле +FS +▁market +ång +citep +Ill +rank +▁sender +▁beim +рак +▁compat +▁occurs +▁diese +ститу +awa +▁iOS +▁Chinese +▁TR +▁Ken +▁Une +▁creates +▁showed +▁év +ologia +▁protest +▁Pf +▁squad +++, +áv +▁essere +зя +kol +▁slightly +addr +ân +▁reduce +▁\(\ +▁Dep +▁generic +Loader +ți +▁пос +▁occasion +▁Lady +entity +▁avant +▁Pas +aggio +\{ +пад +atholic +Password +▁respond +▁Non +AG +neg +▁ус +blob +cke +▁Consider +▁Care +iki +▁Chicago +inden +▁Cop +]+ +öm +évrier +кло +alen +▁maj +racy +orte +ients +ells +activity +▁runtime +NULL +▁possibly +▁stri +izi +▁mir +▁Version +prime +▁twenty +▁Mah +▁sounds +шен +clusion +acz +▁determined +▁Rep +▁Landes +▁wall +igi +▁reset +шо +yan +Met +ei +▁appearance +▁fois +▁nell +esi +ёт +loor +▁Ul +▁resolution +▁fot +▁throughout +▁ri +Level +pool +▁identity +▁janu +▁imper +▁över +}` +▁infer +▁dates +▁Standard +force +ockey +tera +▁distingu +▁presence +lica +▁leaving +itung +éb +▁establish +▁maar +adi +▁News +azon +folg +▁Hence +▁Ye +▁fab +▁führ +itmap +▁Vers +rov +Sign +device +Sigma +▁wetenschapp +▁Ps +PATH +▁torn +vest +стов +account +▁largest +▁percent +▁Women +▁img +tool +▁roce +▁ay +inet +▁août +▁polynomial +▁integral +▁areas +}' +▁hyp +loyee +таль +▁proxy +▁Wy +▁Мекси +▁escape +olar +▁mistake +)}{ +▁Pot +▁processes +"> +halten +zza +amo +кре +▁Wood +ør +▁сер +ocia +two +profile +▁Ast +embro +▁arms +inas +innen +▁msg +INT +▁batter +ignment +▁vy +Hrsg +▁Grund +roc +seg +▁decor +▁eventually +>, +▁pag +anten +▁strugg +}^\ +daten +▁rela +пов +▁коро +▁Bos +▁labor +▁Secret +ugen +▁jap +▁husband +▁Album +▁etwa +▁произ +richt +rach +bat +▁prepar +▁Stock +▁lack +хід +▁hogy +▁Chrome +▁Admin +▁comparison +▁increasing +нг +imi +Db +▁gef +ucht +ése +gence +▁Core +▁incorrect +▁assuming +ourse +ieron +▁Theorem +▁casa +jes +▁дере +▁`" +LD +äß +Deb +▁suiv +▁Bank +libs +▁Leon +▁quart +▁professional +▁tiene +▁accomp +стер +▁UK +NN +▁lí +ця +kel +▁• +▁dise +onto +▁má +ifs +bild +▁compute +▁éd +ję +▁Mé +▁languages +▁Times +cen +▁авто +ým +enez +▁upp +▁méd +▁cuando +од +Intent +eerd +▁Tal +offset +▁haben +reme +▁Stack +▁dri +▁seinem +▁février +▁combination +▁soll +▁movement +Spec +кры +retch +Offset +Root +Ар +wart +▁Follow +▁Social +ников +▁→ +Don +▁harm +agr +nego +resource +▁Luc +▁seinen +▁Department +▁Update +▁Texas +▁reve +▁Pos +▁shot +othe +▁repeated +▁recently +ában +aks +пан +▁cha +ohl +▁tend +▁дво +chts +çaise +pling +album +ej +▁`[ +maps +▁units +▁ +▁pří +pandas +▁Plus +yll +▁terror +▁crim +▁zak +issue +panel +svg +▁reb +Customer +switch +обра +▁Championships +clo +atte +▁anymore +▁excellent +▁opportunity +▁Bahn +чин +eting +▁incident +tom +Pers +bben +ственной +их +router +▁newly +▁silence +▁GNU +▁Rails +▁Amb +▁Qual +▁Schaus +▁Sohn +▁ALL +▁royal +▁£ +wię +▁entfer +▁Remove +▁hardly +Using +лог +▁Ich +▁derni +▁Connection +fish +▁Inform +▁Ener +roit +Bbb +ViewModel +Video +iley +▁много +▁Gem +▁compreh +enumerate +ulas +▁Bah +▁Yet +BR +хра +▁county +▁Hist +▁Гу +▁Ј +▁mari +▁Clar +Bitmap +▁Cz +▁mån +▁mere +▁musique +also +dates +▁DVD +▁gol +fony +▁Castle +▁фами +▁arrang +▁Business +▁Kaz +▁osc +▁secolo +▁affected +▁Health +reb +editor +▁owned +tl +▁ví +чних +кви +▁devient +Mutable +▁tegen +Register +єю +▁caracter +лли +▁nouvelle +oko +ichtet +▁evol +▁Hab +▁militar +▁puts +endif +▁Davis +▁Scotland +regular +▁Context +ispiel +▁Gallery +", +▁arc +▁INFO +▁cod +дів +▁varchar +▁toujours +atial +▁hanno +▁профес +▁launched +▁населення +▁ton +aused +▁із +▁tö +▁Pur +▁olymp +ARN +óm +▁august +▁furn +▁Colomb +▁Staats +hora +▁мор +canvas +▁grave +▁composition +acja +▁которые +▁чо +General +ані +▁Johannes +кар +▁част +▁Васи +ssh +▁replacing +▁<> +ців +laus +eny +ähl +▁marg +cience +▁instruction +▁који +Editor +▁fundamental +mund +▁exceptions +▁plate +▁Lis +▁deren +prep +▁januari +Scope +ynast +rv +orsz +▁Tony +▁ді +▁одна +▁sab +oti +jel +▁generator +▁'. +▁sharp +▁только +▁accounts +▁že +▁foram +▁gouvern +TIME +▁Soviet +▁Gé +▁exped +▁ordinary +▁Conserv +▁compla +tei +▁captain +▁Samuel +▁Dark +▁він +▁delight +recht +dia +esses +ulp +шки +bez +▁detection +▁cookie +antry +Multi +oba +▁joy +▁safety +|^ +pod +adém +▁Chron +▁Django +▁ehemal +kh +èle +▁poc +Bottom +launch +nem +▁GROUP +ního +▁Gib +sdk +BE +▁Gene +▁Staff +▁subsequent +icion +▁victory +▁canon +izar +izia +▁mate +▁layers +sudo +schule +periment +ület +ARCHAR +▁террито +▁measures +▁zou +opsis +нами +tbody +▁ese +sterdam +▁photo +ynchronous +setminus +▁loads +▁pleasure +▁meille +}\, +qual +▁favour +▁rod +Der +рабо +▁pressed +rę +ieving +material +virt +▁capable +сло +ushed +▁побе +usetts +unsigned +ków +▁ov +egeben +▁applying +▁galax +▁Oracle +▁Stuttgart +Infl +achusetts +▁deel +lire +▁statunit +▁Politiker +▁beauty +)> +▁Columbia +▁zewnętrzne +▁програ +▁dx +cknow +▁dub +unächst +findViewById +▁Mand +áll +naire +▁destin +isting +aggi +chart +▁justice +Simple +▁unfortunately +ір +▁questa +▁Governor +яв +▁música +▁equipo +▁Dest +elect +StackTrace +зом +proc +entin +adora +▁Лю +▁registered +HL +facebook +▁storing +▁Currently +▁quadr +Standard +trim +ears +sender +▁Vas +▁edific +▁Bür +▁Country +tha +;" +nor +▁Doctor +rument +Gen +▁Buen +rade +▁kun +navigation +Pay +▁captured +▁struck +venir +ément +▁Tree +▁xx +▁narr +льного +▁installing +▁association +▁inserted +erner +validate +▁lut +▁glo +▁technology +▁Place +$? +▁zv +слі +EP +▁atmos +ugo +ért +▁Werk +▁%} +tele +Span +▁Raj +▁Personen +▁Cant +▁combat +▁observation +parameter +▁agreed +pur +▁shadow +▁gł +Keys +Cred +ouri +▁pale +ické +▁Week +▁Prime +>. +Initial +▁один +▁'', +▁учи +▁Inv +cola +cible +▁Theatre +▁bem +▁satisfy +xl +▁разви +▁pixel +lán +▁twee +çon +нения +▁AT +ège +▁Mort +▁mysq +ften +▁пес +éma +▁Services +customer +▁AWS +ът +▁Ach +%. +▁clarify +▁университе +xture +umi +▁så +▁Pel +serial +URI +▁rg +▁соста +chestra +].[ +wen +▁Londres +▁anys +DataSource +▁районе +▁rein +▁metadata +umble +arbeit +hner +cient +▁norte +▁она +▁scored +▁ray +▁февра +▁protagon +▁Sac +▁commonly +LinearLayout +▁applic +▁мая +За +▁accessible +iewer +flag +▁Rück +äu +▁erano +▁authentic +▁Ry +▁неско +▁embargo +▁dry +▁reasonable +▁Module +▁acceler +▁interview +▁Creek +▁alpha +serie +They +ючи +▁Hof +▁CR +modal +▁sequences +closed +)}$ +▁Чер +▁ORDER +Rightarrow +hausen +}}_ +▁també +▁magnetic +▁McC +▁winning +underline +▁Billboard +naio +▁liqu +displaystyle +timeout +▁considerable +▁eben +ifferent +anu +▁Сов +[( +▁:-) +leitung +formed +▁Manager +▁onclick +TY +тах +CV +runtime +poque +▁Ло +Temp +loaded +▁!== +▁singer +far +▁Comple +▁Österreich +Policy +▁worker +Wrapper +obi +▁discussed +▁buy +▁января +▁Din +▁ged +ској +Europe +▁tall +hos +лаго +▁Block +▁identified +ListView +▁attempting +▁typical +psum +oster +▁журна +Pe +merce +▁unexpected +hui +letter +▁nuevo +▁або +▁VALUES +▁Iz +Flags +▁TRUE +ización +▁growing +estre +▁poly +▁Stone +▁VIII +▁localhost +ählt +▁embedded +jdbc +▁convention +▁scala +сок +▁analog +▁"+ +цю +occ +▁litt +PN +▁актив +attributes +▁Ferd +▁azure +ști +ños +ping +▁teacher +}& +ipe +▁Nob +▁има +Bind +▁magic +▁Transport +ixel +▁computed +agna +erst +HA +Wait +▁authors +▁;) +clam +▁Pennsylvan +▁drug +▁vain +▁employed +▁individuals +▁ange +utat +▁$- +correct +▁experiments +Argument +▁IB +▁père +▁Brian +berger +Mac +iast +Perm +Cast +▁{}; +▁Student +▁statt +algebra +▁equals +▁projet +▁président +ActivityThread +▁einz +enia +rez +essional +▁августа +override +news +▁planet +nn +▁Wis +твер +▁Valid +▁Gef +град +▁eig +antom +▁Meister +flags +fficiale +шая +-, +ationen +mouse +standard +Single +▁bol +isis +▁fruit +course +itants +▁étaient +TextField +▁фон +▁aircraft +▁ISSN +▁western +▁representing +Esp +▁Else +▁sizes +▁satisfied +otos +UD +Final +ój +ève +▁Roy +ffen +▁salt +▁Label +Sk +▁кре +▁Литература +▁см +Attributes +aye +ськ +▁высо +-) +oses +calcul +▁Cannot +Generic +emo +▁Autor +лён +лага +vote +licates +rus +éli +opf +atique +scala +▁Ohio +▁Britann +▁bef +▁Евро +▁Career +isée +ót +bose +▁Бер +▁Controller +pole +▁allen +▁hack +▁extent +▁calci +Mer +▁summary +Mart +▁historical +imat +bud +▁FOR +export +edi +Mapping +▁Ay +▁Ruby +▁definitions +▁{$ +▁yours +rias +Touch +▁Gaz +▁Autom +▁истори +▁delen +▁Kinder +}}% +▁performing +FR +▁Sig +▁Brad +bras +▁Jar +pkg +wr +▁Pays +NC +▁opposed +Try +▁везе +▁Bog +▁writes +▁stories +▁mater +▁stagione +▁sty +▁compatible +heast +▁Guy +egründ +▁identifier +▁heads +пози +▁stup +▁tf +▁још +▁Hugh +▁cards +ovy +▁Toast +allas +▁públic +▁assumes +▁чемпиона +ycler +▁Junior +▁Fich +▁estimated +zerw +dialog +шин +shell +▁них +▁pitch +дол +outube +▁Santi +OnClickListener +▁Magyar +▁vue +ião +▁`# +collect +▁Rou +analysis +istrzost +▁Digital +▁crist +riere +▁campo +Us +▁circa +▁Component +▁NSString +pd +▁prince +▁invoke +▁Marine +Allow +estic +ристи +bone +туры +▁passion +áció +▁orn +вед +▁invari +▁ні +Remove +encies +ilib +▁Director +"" +▁Conse +googleapis +ók +▁Укра +▁Having +Domain +ierz +нологи +Cho +undefined +alloc +▁pied +▁fraction +bia +▁поло +ugno +minister +▁principale +▁refused +browser +*, +▁Hospital +▁universal +▁Ernst +who +▁Gard +'_ +conde +▁[{ +sob +▁Crit +▁декабря +▁punto +▁eingesetzt +▁tör +▁Ni +▁worry +▁legend +▁були +▁komm +rijk +effect +Ori +RES +▁Peters +▁Baron +▁Got +▁honest +äre +ász +▁noble +▁conclusion +▁formatting +▁otto +▁deleg +мб +ptop +▁sends +urname +▁festival +,‎ +рус +▁doch +subject +▁careful +quent +▁Load +temperaturen +▁rue +Memory +ța +iona +▁dentro +▁begann +▁Aqu +▁scientific +kań +лок +elde +▁Those +quier +actér +▁Auflage +)' +▁gradient +integer +▁Import +SK +▁Status +▁explo +AE +Shell +▁Paulo +.» +}' +havior +lei +ulf +▁geometry +prev +empl +▁Lé +anson +▁Alice +prototype +READ +icular +▁бі +▁deutsche +▁Represent +sites +▁Mean +▁diss +▁Zur +▁през +PAR +▁'# +▁Dra +сон +▁steht +markt +▁ease +Drawing +=% +Stop +▁serving +▁także +▁DNS +▁literal +Die +▁вос +▁senior +acion +▁ubuntu +▁Frankfurt +▁Sunday +áb +▁journey +issa +berry +▁sep +▁ion +wert +ország +serve +▁Milano +▁века +рах +▁июля +▁manera +▁stations +▁adopted +▁anybody +VERSION +FE +dorf +..., +▁образова +Logger +фициаль +WRITE +▁ham +▁Future +oten +▁AG +▁trained +▁Nich +▁university +▁Olympics +▁doit +▁cultural +Conf +▁Conference +orno +▁MP +▁bou +cin +High +annte +▁displaying +▁chapter +▁Frauen +▁realized +▁attempted +▁preferred +Dat +▁trouve +▁intention +▁Notice +timestamp +*( +▁Ша +anas +cla +isz +tbl +Arr +▁inverse +▁terrible +▁occupied +JAX +<- +▁Philosoph +▁Corps +builder +▁begins +▁census +.’ +▁proven +metric +▁increases +wich +▁ABC +projects +▁Thor +▁confidence +▁ufficiale +elm +▁garden +▁robust +▁così +iedz +▁Islam +▁Address +▁divide +▁Eu +catal +detail +ependant +fg +▁bew +▁fis +▁BO +▁wsp +▁pipeline +hd +▁Session +länd +iveau +estr +▁particle +▁laravel +pic +▁nau +▁fins +▁Vil +▁fus +▁quasi +operation +▁aller +▁analy +▁Он +▁Mes +▁опера +▁handled +▁deprec +tto +▁Ek +▁stran +▁anglais +jure +▁Silver +▁closely +enkins +anos +sted +▁сентября +brand +ньо +▁présent +rok +mount +▁Anthony +▁Furthermore +inha +▁архи +▁разли +▁октября +▁pint +ný +pts +▁italien +▁реги +лез +дина +atherine +Internal +Question +▁settlement +▁Все +▁folders +дри +▁valor +▁Miller +▁Assert +▁patient +▁Nieder +▁EP +▁Agr +▁onde +▁scop +sequence +▁PL +▁seek +javase +▁Vector +▁ná +▁categoría +clone +NR +available +▁Besch +▁eclipse +wicklung +deploy +enie +▁") +äst +▁sync +CODE +▁Че +▁floating +/` +▁retired +deb +▁particul +▁collected +▁downloaded +nice +▁Buffer +▁Account +▁maggio +▁реда +▁sales +▁statunitense +▁Ki +▁Ferr +Lock +▁Isabel +clar +▁pov +atra +▁Frau +▁sorting +▁phrase +▁апреля +▁деятель +▁André +definition +writing +éré +щу +▁Ord +▁rum +▁Turk +▁Ivan +theless +▁ги +▁sake +▁Based +deck +orus +▁tutti +▁blan +▁Пу +Detail +▁Но +▁Sky +▁près +мой +coln +ческой +eti +▁arrow +▁Cha +chmark +œur +fab +куль +GridView +▁Background +sn +▁seguito +▁nic +cou +тів +▁bzw +addEventListener +sync +azzo +abstract +assets +▁Dru +зд +ordnet +▁bigger +▁initialized +каз +ogene +viously +▁guid +scheidung +▁Zent +▁frames +rieben +▁issued +▁dow +▁describes +ilst +▁criteria +▁gentleman +Basic +nez +Dev +Move +▁estaba +▁settembre +circle +▁fais +▁myst +▁archiv +dynamic +jà +itas +▁який +▁dor +▁Amazon +▁neces +▁Marcel +▁ella +рок +▁Pennsylvania +cular +Pack +itage +▁Burn +▁RO +▁они +~$ +TeX +assign +▁beat +idense +acent +Alert +▁strateg +▁månaden +LOC +▁catalog +printStackTrace +()). +usted +▁Framework +ECK +▁até +Framework +▁attacks +▁Bert +▁тран +:% +arsi +notation +▁logical +weet +▁visited +bru +▁surprise +^^ +inale +remote +'}, +Syntax +iane +onnen +▁breaking +parser +apk +▁Miguel +▁§ +▁acting +▁gebru +AtIndex +ються +▁offers +▁prac +▁grant +ternoon +▁acquired +▁Ny +▁comma +ník +▁Step +inners +▁SA +▁wat +days +▁rectangle +dar +▁trac +▁Indones +▁feedback +▁breaks +partition +icans +▁Notices +▁improved +phan +▁differential +scripts +▁XIII +▁Labor +▁precision +▁seed +bundle +idents +hre +▁Douglas +uld +▁secondary +▁brig +▁confirmed +▁claims +Role +▁Jewish +▁před +▁hotel +▁compte +▁recursive +](#) +▁rotate +▁chrome +inea +%; +▁Environment +platz +▁Single +▁sevent +▁posting +▁dealing +parameters +граф +Authentication +touch +Az +▁gray +encing +boldmath +▁сайте +▁Za +anje +▁polar +▁ули +kil +▁hover +▁REST +▁Come +jb +▁Georgia +▁Estado +OutputStream +ћи +▁dump +▁Age +▁swo +mobile +occup +шего +▁constitution +good +aku +▁анг +ieck +▁Psych +▁roots +▁vest +▁годах +▁República +▁pian +igration +▁préc +▁generates +LY +(` +▁=~ +шения +▁Rah +▁connecting +ží +▁fő +▁appel +▁Railway +гли +▁développ +▁apo +fran +▁immediate +вого +Runner +äg +Something +▁généra +EventArgs +inction +gly +▁Due +▁prost +▁referring +▁jog +▁executable +▁Dream +acs +▁Cole +ampf +▁Bis +▁июня +lieder +тек +▁vb +▁mom +▁:( +▁dernier +'=> +▁этого +▁neue +▁Ча +▁weitere +▁alleg +▁reality +▁judge +▁Balt +▁thin +▁Ged +ieval +mx +ціональ +▁выпу +▁IX +▁blind +▁Motor +▁ша +▁approximation +dam +▁fog +кор +▁Writ +▁ling +▁писа +▁Mars +otti +Enum +▁Trib +▁merc +zung +vanced +cfg +нах +schen +"]. +bek +▁ster +jp +▁Rap +▁recording +▁peint +▁lets +änge +>"; +▁місце +▁caval +▁CSV +▁entstand +▁helper +endet +▁Gram +▁Diego +▁Bishop +TAG +▁ecc +▁Een +▁AV +City +▁Guide +hind +rical +▁Основ +Bus +▁zunächst +▁tick +▁Colonel +Thanks +▁ferm +▁granted +▁threshold +omorphic +▁Hun +enis +▁прав +▁які +PG +▁ws +▁technical +estro +klär +vars +ocrat +▁општи +onso +iba +▁Save +▁programa +▁въ +▁invån +>() +▁mejor +▁слова +▁replacement +▁impr +▁Francesco +▁Hotel +▁UPDATE +▁музы +ugs +vard +▁faz +inton +▁arts +▁Ky +▁Ils +▁sera +▁Volume +▁giugno +▁asym +▁Pir +▁NAS +▁Tam +ěl +Sequ +kmal +▁Eins +▁компа +obe +oor +▁heap +ctl +▁separately +reader +▁significantly +▁Lag +notes +▁sele +▁dedicated +▁Host +choice +wing +▁Titel +▁befindet +large +▁conten +JavaScript +▁deser +▁Gordon +спе +▁patri +▁Random +▁Returns +ым +рома +▁Studies +Sl +▁frü +TEXT +inate +▁Tol +▁everywhere +arta +▁orbit +▁Aires +▁Iss +▁też +▁diverse +▁numeric +maz +▁mise +▁battery +▁Akadem +нение +▁simultane +▁Dead +▁clust +▁otro +▁cerca +()`, +roz +ăt +▁MO +riften +important +▁jeho +▁findViewById +▁consequence +▁measured +ishes +▁sze +iendo +▁Wahl +strip +ARD +▁opacity +WORD +▁Ві +▁Location +rai +пен +▁rif +aussian +FileName +▁disco +ilen +▁vagy +licity +Border +▁Track +бом +fact +oka +▁gior +▁XVII +▁där +Site +ało +ská +▁pixels +vity +jQuery +▁sculpt +▁cargo +▁directive +▁wal +▁conna +▁Through +▁этом +Static +omsnitt +▁rund +▁claimed +зня +sha +▁rag +crement +▁fünf +▁rival +rin +slash +▁thirty +sleep +ологи +SM +gate +izations +vik +▁bless +▁Illinois +▁TE +uting +▁solving +GER +▁XIV +▁Indians +express +▁Heil +▁mujer +▁invånare +']); +▁aur +boost +GO +▁nin +tok +god +oter +)$$ +▁descend +рю +▁Language +▁diver +▁Assuming +▁frequent +чні +▁Biography +,[ +urm +▁walked +▁federal +▁Michigan +▁facts +▁Integr +LES +▁Alan +▁coup +Ber +▁particles +ће +Inflater ++( +Bound +▁Sü +Audio +citet +yect +▁nr +xe +▁Brun +▁_, +avor +▁discipl +alm +▁ноября +▁SSL +▁Kaiser +▁recher +ygon +▁regardless +▁configur +▁unnecess +▁Clark +PHP +▁FALSE +▁pad +$} +▁valu +▁disease +▁maior +▁hommes +▁Edition +slant +▁ending +▁settled +urus +hed +Pattern +▁година +▁Philadel +tikzpicture +▁coal +▁sede +▁satisfies +▁trim +▁bat +▁américain +▁luglio +▁поча +ffff +▁Target +generate +▁Zie +ția +▁gard +▁workers +▁Job +▁urban +ahlen +▁Building +▁neu +▁chron +▁Earl +gro +USE +▁XII +▁wealth +inae +▁Бра +▁libert +iros +:$ +lee +ieves +▁Justice +▁oil +▁Athlet +▁clo +Scale +▁lips +▁april +▁impression +▁perce +▁участи +vil +éch +▁equality +▁мет +▁annotation +ernal +▁Mach +▁intitul +problem +ющих +oplus +▁thousands +▁calculations +umps +▁triangle +phal +▁Dorf +▁dollars +▁denen +lès +olid +▁Results +▁Stadium +▁Desp +▁Eisen +imir +▁sotto +▁či +atable +orum +▁convergence +▁jeune +oking +▁живо +aining +pointer +culo +▁jsou +▁grab +akte +▁hoping +▁Mak +▁sag +origine +▁послед +▁Veg +▁theoret +▁Tru +nement +▁faces +Hor +Join +arel +▁около +However +▁catal +bourg +▁mysqli +acions +▁Initial +▁rain +iture +▁Sciences +▁Kreis +.__ +▁cinq +▁Auß +ithmet +itors +amazon +▁gap +▁ignored +adv +кої +▁часть +▁corpor +цер +▁crime +uous +▁налази +DataFrame +води +Ign +▁Lincoln +▁menos +▁Luft +▁Lind +▁Cook +▁materials +apped +ignore +▁откры +fried +▁gouvernement +▁fired +▁screenshot +сен +▁[( +▁организа +Graphics +▁проти +▁phen +craft +▁brain +▁Como +▁Everything +anes +IGN +▁nederbörd +▁Forest +zahl +▁Among +Qt +▁togg +▁variant +▁hill +писи +colon +▁dicembre +гор +▁Wind +ünstler +▁=\ +saved +▁nej +unte +utto +▁recens +▁sick +▁desen +UST +▁worst +▁Angel +odox +▁Province +▁Maz +▁agreement +▁Bass +▁segunda +onces +▁Linki +▁CL +▁já +itement +▁área +▁scalar +▁Рес +awt +sieme +▁juni +▁худож +ikus +▁lid +ppel +avi +▁balance +ipping +cussion +ческих +(". +Also +▁whis +HOME +▁brown +▁día +▁può +plotlib +▁Jahrhunderts +DK +▁anchor +...] +▁Austria +▁marca +▁gez +iously +▁lazy +xa +▁Channel +▁neuen +das +▁searched +▁staat +▁Так +▁Josef +▁Sher +pois +▁enem +▁accessing +▁неко +▁furono +▁pseudo +?> +▁estadoun +▁Види +▁motiv +▁recall +isson +ób +)-- +▁Erz +▁савез +Direct +соб +▁sho +völker +Ap +gens +ништво +▁Amsterdam +usk +пло +▁simulation +▁BC +▁Woj +autom +Alex +▁economic +гом +ikai +▁altre +▁'- +▁Weg +NotFound +йской +▁converting +phabet +atrice +bourne +alom +▁comparing +▁Zo +▁fla +вая +▁entra +▁charset +developers +ística +}> +▁Jazz +▁Howard +шта +▁clone +door +▁Pin +*** +▁silent +ecycle +isce +▁mud +▁Display +▁lip +▁использова +▁characteristic +▁sb +firebase +▁Bew +Calendar +▁uso +èse +▁Rat +▁esper +▁throwing +▁rodz +▁yards +▁grass +▁marker +▁Kos +Theta +▁organis +kernel +▁personas +keep +▁exclaimed +oslav +▁Entertain +нер +▁inwon +▁Rand +reduce +fac +expression +yj +▁differenti +aglia +▁templates +▁mű +▁prv +▁mois +▁gewann +▁була +bibli +demo +▁Anderson +▁ред +▁porque +▁Pologne +▁trip +▁exemple +▁Internacional +▁као +Insert +general +SESSION +berga +hält +unas +мира +▁yields +mapsto +spot +▁+\ +лла +▁precisely +▁член +shadow +Are +unal +▁dispar +▁título +nest +▁Low +▁prot +▁Costa +named +▁gained +lesia +▁administration +Import +branch +▁sympath +voj +▁EC +▁municipio +▁animated +▁directories +▁roof +ząd +imet +proto +bla +:] +have +atem +▁ns +▁sector +three +owane +wers +ових +rence +▁extr +igten +▁occident +ță +▁eat +▁hydro +ubernetes +[@ +▁Moon +▁Sho +▁elsewhere +üller +Upload +ланд +▁För +wissenschaft +KS +▁physics +tz +▁серед +▁Arbeit +▁мест +▁Gebiet +▁insect +Ah +izado +▁temple +▁annual +stad +▁habitat +▁AB +wort +▁repos +▁Neu +▁$(". +Vorlage +▁reprezent +estanden +Intern +.` +▁failing +▁Material +▁effectively +телем +▁гла +▁nahm +▁differently +extension +▁Verm +enabled +configure +nio +ciones +▁Beach +сона +▁copying +▁україн +▁призна +zh +Desktop +▁sost +▁subsequently +▁Lehr +▁ó +lär +odor +phon +nc +iterator +▁эти +▁europé +▁Toronto +ódigo +▁posto +ffe +▁crew +▁Schwar +Sa +square +▁beside +▁Мі +▁ath +▁advent +cji +written +▁russ +rost +HI +▁dice +cca +▁dép +ply +bigg +ział +ütt +▁одно +JECT +ському +nos +mock +Launch +same +▁jobs +▁widely +▁defines +▁Pse +▁neighbour +ющие +▁closer +▁располо +▁clubs +fly +шим +▁suffered +▁nar +▁lavor +Extension +itionally +▁grace +▁Campeonato +▁Christmas +middle +othek +elements +▁sondern +▁tarde +▁permanent +▁conclude +Seg +▁акаде +}", +▁февраля +řed +▁IL +jud +▁USS +▁Nature +ifference +Serializer +▁twelve +tid +мия +ческого +▁calendar +concat +▁intersection +▁PA +azure +▁située +▁kinds +▁ausge +▁rural +Theme +▁tale +noindent +going +rx +agi +wrapper +▁Coast +mbH +▁перед +spre +▁}\ +▁LI +znam +itled +Sample +uliar +*\ +▁resistance +stock +ked +▁HE +▁possession +▁Ring +▁magyar +outs +▁Secretary +nde +▁Wald +-( +▁ISO +▁afternoon +ionen +▁stops +▁constants +guard +bow +▁ers +▁Firebase +▁Clear +▁Holy +Win +▁titles +▁трав +▁contrib +häng +▁photograph +▁Distribution +ifts +▁aunque +comb +ADD +▁publication +▁служ +▁кня +▁ayant +▁restore +▁belief +▁vég +▁extensions +▁decom +вший +WT +▁parti +▁gioc +▁мира +▁issu +pipe +▁props +▁willing +▁nest +aso +pot +▁handles +▁фо +▁moder +▁ebenfalls +▁fighting +umbn +▁transparent +▁Krist +▁homes +▁voyage +Failed +▁Bird +▁Heart +Counter +▁Scottish +ática +▁arbeit +^{-\ +▁Sor +▁engaged +▁aside +▁Fou +▁wiel +▁reconst +ousin +▁hosted +▁classe +▁contest +..." +мом +▁bean +gem +▁consultato +▁bio +▁subjects +boBox +▁Schrift +▁dinner +ăr +▁równ +▁%% +bage +▁veröff +▁detected +ienn +rose +▁Ton +Complete +▁proto +ichts +STAT +Checked +▁inten +▁smile +▁strip +neut +'); +four +▁todas +Controls +▁thorough +rup +▁држави +ită +Protocol +Ка +▁expanded +extra +oport +▁Станов +leases +▁notion +▁guest +▁Islands +icked +▁Dave +▁reflection +liv +ální +▁revealed +▁sog +▁Tax +▁periodo +▁Weltkrie +catalina +qué +▁Father +▁Bir +expect +▁regression +iné +▁dabei +perm +мене +▁Abd +▁CF +arks +resolve +wedge +▁initialization +▁Véase +▁приня +stmt +▁income +MY +▁odkazy +▁Siehe +▁bodies +▁soc +Random +▁senza +ablo +▁regarded +onCreate +▁Magazine +▁Raf +▁Buenos +ил +))); +capt +redirect +▁petit +▁farm +▁rôle +▁статьи +     +subfigure +èces +ziel +▁окон +EE +mee +▁perten +▁représent +▁LA +?' +▁тру +▁rational +osof +▁kne +▁artists +Flow +▁Аль +izard +▁numero +actic +▁destruct +▁Пра +onsieur +qt +abestanden +ność +Connect +▁oracle +▁Stockholm +sizeof +▁gemäß +ACT +▁expert +utions +▁hacia +▁logger +▁fool +rypto +ær +▁cidade +▁составе +oker +▁Transfer +▁denied +Track +▁radi +zec +▁Historic +▁Einwohner +кою +▁хра +▁Category +▁Disney +▁swap +Begin +▁mientras +▁dance +▁tête +▁droit +erta +▁birds +▁convin +parator +дра +▁ES +▁Ressources +EGIN +ücke +▁Cruz +abling +▁"@ +▁metres +▁Beg +▁Gründ +▁Boh +▁mile +▁Technology +"+ +acco +▁ss +▁Fed +▁Hend +usch +itä +folk +▁absor +antal +odge +▁WHEN +▁Externí +▁Regiment +▁evaluation +▁Tai +▁vocals +▁experimental +embed +▁Minn +▁вме +prec +every +▁hoof +▁Fernando +▁Bibliographie +▁nag +amerikanischer +▁marks +▁UTC +▁uncertain +дия +olia +▁cup +▁fille +▁dok +useppe +esterd +▁Brand +▁Third +PP +nodes +▁Pad +▁loved +swing +▁surprised +ardi +▁GR +]" +▁equally +ihe +care +писок +lijk +rinn +▁\[\ +▁sons +▁tät +icamente +▁listing +iellement +▁nyelven +▁ds +▁agricult +▁Hermann +▁besides +progress +▁peculiar +focus +cn +-$ +ственный +ourg +▁wyn +▁conducted +▁Становништво +connected +▁bott +▁смер +▁Poz +unct +conda +▁савезној +▁havet +ligt +orted +▁entering +multip +▁Temple +▁Plant +typeof +▁Vlad +▁qued +▁reste +▁май +▁Very +ambiguation +▁challeng +▁respective +▁тор +Ctrl +▁absence +aru +вое +▁först +▁sq +▁Emperor +▁Ign +▁това +:` +adoop +▁Madame +▁gruppo +stud +▁externas +▁Александр +▁dign +▁живе +Amount +▁correlate +▁Fant +▁rails +fp +министратив +▁bought +▁filters +▁ancora +▁partner +▁quand +symbol +ulating +▁zd +awn +▁Grant +because +rable +\} +ísticas +▁уче +▁période +▁ske +▁Anyway +▁indexes +▁directions +▁RAM +chrome +▁apost +▁warnings +▁Airport +VI +abile +▁lord +provider +▁Ji +ostream +▁gemeente +tableView +Extra +cursor +eground +▁Moz +▁rib +▁morph +loads +elsk +▁MAX +▁Santiago +▁Him +codes +▁lanz +▁counts +rinningsområ +щё +▁spé +▁pierws +▁Sver +▁acknow +Boolean +▁фамили +▁Senate +шов +agers +▁Nueva +bil +kiem +▁Mey +wij +▁GmbH +validation +▁ensuite +inking +▁campion +▁financial +izon +Headers +▁deprecated +▁fonction +REG +▁volumes +▁Chi +▁encountered +lak +рая +▁continues +▁~[ +uerte +▁\; +▁Dok +▁weights +▁rh +▁Napole +▁naturally +sku +pas +▁gegründ +etr +▁Ku +icted +▁fabric +▁ASC +▁Entertainment +▁energ +клад +omon +theme +▁харак +▁draft +▁channels +▁desert +▁través +▁Lock +▁siendo +фек +même +▁packet +▁Mountain +▁Fahr +braio +пере +▁genannt +▁deployment +Pal +ног +стру +Prim +für +▁dangerous +▁szám +reck +▁popup +icky +inar +cowo +нцикло +ítás +▁plugins +▁driven +лев +▁"( +tta +▁Ú +▁eb +▁''; +▁knock +▁основа +▁maison +гля +▁Honor +tail +ritz +▁guys +▁combinations +ondere +▁Ald +▁fiddle +дав +urd +▁projection +▁También +verb +▁terre +rugu +▁september +▁= +▁Beat +▁Sax +vertical +кто +▁plants +▁Références +▁ogni +▁curs +▁SK +они +▁destac +"); +▁Sure +▁partido +▁Folge +▁Moore +▁wz +скус +ltre +ondo +▁pose +imos +бой +ципа +jus +..... +▁época +▁quanto +▁Support +geschichte +SERVER +▁Georges +enum +▁herm +▁nebo +▁Chr +character +▁*** +▁Forsch +iami +▁¿ +cych +▁fifth +sent +▁anderem +▁proportion +▁prest +▁Girl +▁drama +wand +▁Mail +▁Lux +▁který +▁Gesellschaft +▁Hinweis +nisse +▁mondo +Eq +▁perí +▁eastern +▁UEFA +uale +▁convex +▁поль +▁Hey +zenie +initely +▁Zusammen +SSL +ocal +▁canal +voy +▁Кри +▁között +▁cars +▁versión +Environment +Her +▁señ +▁spatial +ymi +Fire +▁veget +▁Wie +▁znaj +▁damage +▁endl +gif +▁quali +▁которых +ellan +▁mens +▁plug +▁abund +FIG +▁sf +▁confl +▁населения +▁principles +▁Gabriel +ibe +▁{% +▁població +ніципа +▁extreme +▁asse +▁vu +Mock +▁spielte +▁Aer +▁datos +endes +▁Gel +▁Gor +Christ +chos +Processor +▁instruct +▁picked +nahme +fahr +▁indicated +▁%. +▁ts +▁notable +▁qualified +▁Ал +Black +▁council +▁overhead +aci +année +▁initWith +bió +▁introduction +▁companion +▁expon +▁kör +oby +burn +gnu +virtual +▁intellect +▁держа +'+ +бле +▁strictly +▁recognize +hour +▁Wrest +ennen +$). +fff +▁Centro +▁Pitt +▁dział +▁cela +▁francese +рами +special +▁Dup +toire +каль +COUNT +▁Brook +▁руково +publique +▁seconda +▁compt +▁bland +Before +▁Pack +alty +öder +▁intervals +▁Datenbank +Movie +▁transm +▁tap +▁поч +fon +iai +▁fib +▁wyd +▁hung +▁alive +Clear +▁pushed +▁tuple +achen +гово +▁revers +▁augment +▁challenge +lost +▁deuxième +structor +▁mehrerer +atural +Split +стем +шла +)\\ +▁Dog +▁developers +▁nod +▁сторо +▁NaN +▁priest +▁exha +UND +pair +alone +▁moon +▁#!/ +▁guns +rola +чита +▁Encyclopedia +atis +▁'" +zych +▁superfic +▁эк +едера +feed +LAY +Fi +unks +isecond +▁'@ +▁Adding +рое +▁tang +цо +hung +bis +ského +▁advert +▁занима +uzz +ágina +▁Tel +sig +▁Ez +▁guarantee +▁teaching +oty +termin +▁distributions +FLA +▁Giuseppe +querySelector +▁/\ +▁Squad +gz +delay +▁surrounding +▁manus +▁Hou +², +▁cultiv +▁troubles +▁raison +expand +▁cov +nungen +)){ +▁geen +▁außer +▁Лі +ři +▁situations +▁telep +▁Jed +▁travail +lias +bullet +▁selecting +avier +▁essential +(/ +yyyy +ště +ulty +▁kra +▁tabs +▁experienced +azi +▁Directory +▁cron +▁spend +▁RA +▁selenium +▁Thé +Elements +cii +▁plat +▁archive +▁assistance +▁neck +▁Avenue +▁wheel +▁hade +Common +▁Dialog +▁forg +▁surely +▁hockey +któ +▁tk +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +▁Bruce +▁enorm +,’ +▁Christopher +jev +▁quad +▁AJAX +▁relief +▁modes +sklär +▁Vid +▁Serial +▁tokens +▁Poland +\] +▁vide +rooms +omas +▁Bureau +cx +ностью +▁signs +шение +lossen +▁Queens +▁membre +▁mez +▁Bool +▁Naj +▁Memory +▁Khan +▁là +▁Hud +▁dismiss +ighth +▁fs +prevent +▁меда +▁Police +▁ско +finite +▁ami +▁Much +owania +ORY +iors +▁Premio +▁textbox +dm +▁afin +▁Donald +▁Priv +▁decid +▁Maurice +agan +▁Britannica +▁oft +▁consecutive +"?> +овий +student +▁peque +▁dieses +▁retour +étr +▁сез +▁kre +▁votes +ruption +izada +▁Wiel +▁Gray +▁Leop +teilung +([' +▁whites +frica +animation +curl +lings +="$ +loyd +textsc +ору +▁села +esian +▁Mission +▁неза +▁ultimately +бов +olen +скому +nete +▁Dit +▁costru +dependent +▁Resource +▁hosts +▁rear +Duration +ників +Ма +▁planning +▁prediction +▁Lyn +▁kir +▁Legisl +мат +▁Soccer +▁survey +▁estadounidense +orgen +jourd +▁aprile +▁ids +ське +▁employee +▁Schauspieler +ръ +▁multimedia +▁свою +▁wine +▁EU +ică +▁Rhein +▁Palmar +oteca +▁prepare +▁Tot +▁Null +▁kin +inals +▁Newton +▁tbl +▁Sold +▁verf +aturing +▁laptop +▁Совет +secret +▁Olympic +▁footballer +▁Rudolf +▁conhe +zysk +▁evaluated +») +shop +repository +▁zach +▁losing +etter +▁Wirtschaft +так +▁unnecessary +▁Phot +anska +▁Native +CCE +▁fifty +▁erw +rh +issent +}{( +▁lanç +▁Xcode +город +cir +▁película +▁Oscar +▁shore +▁supplied +examples +Mess +VICE +▁exclude +▁hen +▁губер +▁Fragment +▁Bitte +▁Besides +▁hes +▁ihrem +▁Serge +▁artific +="${ +лово +uteur +taire +пас +▁easiest +▁famiglia +Normal +▁dalle +▁nations +rp +thead +▁області +▁Democratic +▁челове +мож +▁гер +▁smallest +▁Publishing +▁Ts +▁laughed +lle +▁Amt +▁IIS +FORM +Mag +дон +▁storia +▁organized +ční +▁ox +lingen +▁luego +cció +▁rely +▁tussen +erten +▁honour +▁Claude +▁Korea +▁Metropol +Super +rien +érature +attro +▁біль +▁Herbert +▁auteurs +▁darauf +▁mental +▁rang +▁són +▁Soph +)", +Descriptor +prepare +▁Landkreis +HC +cross +лиза +▁Login +onen +Feature +▁museum +vek +▁Nelson +▁rejo +▁команди +▁summar +▁следу +ämp +▁Gas +вом +VALUE +inge +period +lassen +ával +▁altogether +umph +istro +ąż +▁Keep +▁Marco +▁étant +▁Dre +geometry +▁Kas +messages +Cook +▁Side +▁коми +стри +▁excess +▁Biografia +XXXX +▁Nie +vendor +xsd +Mill +processing +▁Missouri +▁permett +▁apar +▁crowd +fert +▁Dou +rí +▁CC +▁payment +▁Hollywood +▁Virtual +▁spoken +▁tram +▁Community +▁administrative +▁воло +gior +visor +▁Украи +stage +▁Format +▁convenient +На +▁median +▁вра +▁Према +enig +▁Opera +rés +▁fmt +▁efficiency +male +Master +Series +▁syd +generic +interval +▁efect +▁inwoners +лимпи +irement +Err +öh +▁lying +▁Settings +!= +ematic +argv +▁Basic +▁consideration +▁habe +-% +▁mountains +▁peak +▁fallen +eded +logic +▁matched +▁typing +)}, +▁fancy +▁elegant +ال +▁участ +▁Sarah +▁Verd +▁tego +rules +▁mounted +▁ім +еру +stoff +fahren +distance +▁License +▁LEFT +▁wp +/{ +▁amazon +>& +▁első +quarters +▁shock +nick +▁Archite +▁Square +▁rates +iore +▁Nat +▁Charlot +reichen +▁variation +osis +life +slide +abi +uki +mysq +▁primitive +▁universitaire +LENG +ależ +ebook +syn +▁Gegen +▁Kü +▁але +▁Lub +concurrent +izzato +▁stub +▁ie +▁'./ +cod +▁internacional +▁Glas +▁mare +▁Neb +▁GB +kwargs +▁aument +WID +▁род +punkt +▁Grad +SN +AMP +▁Born +▁Guerre +готов +▁medio +Med +supp +actual +dropdown +▁oktober +▁ř +▁circular +▁skin +▁emphas +▁голов +▁pue +▁informations +▁Wolfgang +▁useless +ит +▁Joan +▁бор +▁Glad +▁Know +ként +speed +▁Kevin +unft +▁arqu +▁Casa +(... +▁rapidly +▁proble +▁Википеди +žen +▁Neben +▁Meter +Children +cem +igos +aju +▁Retrie +▁Hell +▁gig +▁controvers +▁zoom +▁cens +▁alcuni +▁Header +Meta +Required +▁институ +▁skup +▁ingles +égl +bij +▁tér +▁compag +▁committed +▁processed +Lower +▁Foreign +▁seq +sheets +▁Fem +hoz +inks +▁kall +variant +▁libro +▁clicks +▁gobierno +iegel +мого +geme +▁tower +▁parish +▁TCP +▁ls +▁nginx +NaN +▁Dir +▁Begriffe +arie +ímp +icios +▁sharing +▁cinéma +bec +RED +▁Kra +abol +▁flux +▁expensive +▁суще +▁`_ +ocz +лист +▁acquaint +▁wise +▁pouvoir +▁devant +▁momentum +immer +▁Coupe +indexOf +▁doesnt +▁зав +▁license +▁â +CSS +▁rice +Team +▁ano +lit +▁merged +▁Cell +лл +boy +asts +▁sell +▁große +▁virtuel +Cancel +▁sj +gment +.< +чай +ië +akh +izers +prit +▁Tib +▁elaborate +▁fé +▁меди +LENGTH +▁primarily +▁scores +▁carrying +▁lake +compose +▁Township +unge +▁alberga +anych +quelle +▁Ark +▁pris +▁voll +шли +Validation +▁ceux +▁populate +" +▁femmes +ANG +▁Despite +вые +iske +zug +нача +▁hatten +INSERT +Employee +▁moments +▁última +▁holder +blank +Collections +athers +▁grade +▁affairs +.$$ +▁delta +▁Jugend +▁español +▁OUT +▁mathematical +▁mongo +▁Фе +uling +▁revolution +▁coin +▁subclass +"=> +äche +▁pyg +щая +illery +▁comenz +depth +▁cél +▁resize +▁Same +▁strik +▁tir +▁scarc +▁Member +subscribe +óż +útbol +except +▁driving +kie +zony +èmes +David +issant +▁ты +▁élect +▁rename +▁Running +▁interfaces +//////////////// +▁Walker +▁société +▁asks +brid +▁jewe +▁seines +▁agents +▁MY +▁Lawrence +dess +iesen +▁людях +прави +▁ancest +▁welche +raum +▁orb +scal +▁Lear +▁wear +▁slave +▁renamed +čen +maste +angles +▁América +▁ti +▁demsel +▁beneath +binary +▁edición +▁kilomet +uits +▁cuatro +▁entrance +ondissement +▁bag +▁Armen +ijo +▁Lors +▁demselben +êm +▁discrete +▁prominent +▁Jay +decor +DL +▁dí +Struct +▁Production +they +arius +schnitt +▁Cou +▁lex +youtube +▁работа +station +sep +▁mirror +▁hits +▁Beck +atically +▁Laz +▁winner +DEX +▁INT +}^{- +▁wegen +mad +Angle +zing +▁Bayern +sal +äger +▁busy +▁stör +▁folk +▁prix +▁allocated +▁pt +affen +cluster +▁complement +árs +▁Amerika +рій +▁valley +▁rooms +▁moi +.", +;;;; +▁lowest +nog +▁landet +▁programme +chio +▁Während +ández +▁долж +▁ouv +omány +▁Википедии +▁só +▁elektr +Desc +▁Beaut +нар +▁може +Pierre +esota +▁operated +▁forte +рис +▁opposition +alia +▁Syl +getName +вели +fik +▁comprom +▁TextView +Spring +metadata +engu +/, +▁carri +istol +▁diagonal +lista +izen +▁rende +gcc +beck +lius +iral +Resolver +▁percentage +▁attra +strings +wiąz +ods +волю +ęż +▁newspaper +imiter +ABC +▁Manchester +[{ +Agent +▁Wor +▁Kath +▁пові +▁entonces +▁niveau +atted +learn +atiques +▁уби +▁quindi +binding +▁imported +▁Horn +emberg +complex +▁neural +information +▁recognition +ingt +▁inhabitants +vue +▁Bevölker +▁curves +▁leb +дій +▁sow +▁sentiment +PH +rache +▁-( +▁estable +▁Ferdinand +▁écrit +▁primeiro +▁tex +▁intermediate +verage +ibus +▁serves +ivas +▁bru +▁lum +attice +чный +▁Dres +▁videos +duration +▁abit +▁egg +ographical +alph +STATE +▁пара +reading +▁vehicle +▁fortune +ultats +▁Storia +midt +łącz +▁Memorial +▁vas +▁зан +▁utility +▁obsc +▁relacion +▁runat +Release +take +▁Oliver +▁Sid +ulos +▁Garc +▁розта +▁Sak +Py +führt +▁trabal +*{ +▁zes +▁szere +▁varios +▁otra +▁eval +▁situé +▁wounded +▁Vincent +▁викори +▁encode +Modal +▁forb +▁dynamics +▁depos +arde +▁streets +▁Komm +=$( +▁повер +▁dois +▁vitt +▁automatisch +▁reload +▁Verwalt +bero +▁hub +▁mos +▁tutto +▁Frederick +łow +antages +aque +paper +▁einige +`), +dj +▁Ple +▁%, +▁Bitmap +▁friendly +▁truly +▁stroke +roph +▁engl +▁coff +▁dust +▁Jahres +ppi +▁wys +factor +schluss +▁деревня +▁Past +▁дома +COM +▁pueden +▁gift +▁Gla +▁triggered +ély +ülés +▁Oliv +▁verso +▁lle +▁Gli +▁Ltd +oa +▁territorio +ordre +▁deck +dra +aszt +▁concerning +▁Additionally +▁které +▁grund +▁Gest +▁misunder +pret +──── +▁reputation +zia +▁успе +▁escaped +▁Prag +perform +▁austral +▁Vater +час +▁races +▁Byte +Mask +▁Territ +стю +▁Voci +▁Fichier +▁Населення +▁Unterscheidung +teenth +▁pilot +▁ji +▁двух +▁orientation +indre +▁Dort +ças +пли +▁reaction +▁consisting +▁ferro +тисти +yard +▁сві +▁interpretation +ią +rah +▁fand +Public +▁universe +▁retir +▁conscious +arqu +▁waste +▁Bib +yclerView +▁listening +gleich +niejs +▁correlation +▁receiver +▁уда +▁courage +uchs +fass +▁chunk +▁Anfang +▁großen +continue +▁Warszawa +hé +iy +ivement +▁α +▁exposed +▁zahl +▁sacr +▁Looks +▁eager +enten +Cursor +/_ +ixa +рела +знача +▁фамилией +▁argent +▁Anders +œuvre +▁Isa +мента +▁advers +riction +GP +▁після +▁preserve +▁Garden +Rate +après +▁readable +indu +▁skill +▁helping +ographique +cling +ologist +▁Filter +▁finger +▁Vall +▁Polish +lg +▁Familien +▁waters +▁pseud +aza +_) +ARY +▁среди +▁Must +▁Bod +anon +▁lado +▁tight +imen +appen +frames +ingers +▁COVID +▁зі +▁све +▁ць +▁Left +]]; +чь +фика +▁сло +▁пі +▁existe +▁Atlantic +▁maintained +▁irre +▁année +▁commented +веро +berta +▁Lad +▁Upon +▁pause +mill +opter +UK +рес +нциклопеди +▁alongside +▁robot +▁fert +▁moy +▁ade +Mapper +)-> +igua +étique +тка +alias +▁ори +▁Magn +▁gehörte +imb +)}{\ +▁Wikipédia +▁urs +▁ende +leb +▁GC +Hol +ancing +Union +▁tenía +TT +▁estate +há +▁полі +ultan +▁Hockey +ulse +▁choices +scher +▁[], +▁potentially +▁Übers +▁admit +Comment +стя +▁Vien +▁ці +▁permut +cgi +▁crít +Console +ctic +▁okres +awk +football +ouest +CTYPE +ologique +▁constit +▁interests +▁Progress +▁Menu +▁také +▁Asian +▁защи +▁younger +▁wished +▁Sort +▁audience +amba +▁gehört +▁Kansas +yaume +▁Professional +âce +▁fatto +tod +▁datasets +▁fare +▁waves +~/ +▁measurement +▁wol +indust +▁struggling +▁pulled +▁caratter +▁Externe +▁действи +cnt +liches +▁Possible +▁faced +▁hypothesis +▁kilom +▁när +boolean +PY +ampa +▁kiss +▁astero +▁negli +aments +▁Stu +ató +▁Constitution +▁interpol +▁Unable +▁pis +▁parc +"]) +pler +▁autory +▁algunos +ywna +})) +▁falls +▁équip +▁emit +▁profil +gets +фо +▁Military +▁nombreux +oct +Replace +▁seasons +▁château +▁typeof +polit +▁rand +▁quar +▁erstmals +сини +▁payload +По +кін +repo +▁Pav +Score +erves +▁sollte +▁між +ébec +▁clip +▁Nice +▁neben +▁assass +itories +▁unity +▁ен +▁Institut +▁internationale +▁наук +▁comand +▁kleine +▁adjacent +▁delivered +▁ше +зем +▁cot +visual +вает +▁Census +\_ +▁territory +чил +чные +flutter +DidLoad +Documents +▁dob +Bre +animate +▁biz +▁bata +▁SU +eso +▁priority +ván +iras +▁charged +▁Micro +atoire +чер +abad +uru +▁vš +dire +▁Twitter +▁мето +).. +▁Цент +▁entwick +▁Mind +▁функ +Future +lst +łoż +fli +tensor +▁topology +▁arte +ERT +▁variance +Images +▁(@ +ArrayList +OC +▁Демо +aucoup +▁denotes +imon +њи +▁Przyp +▁Zag +▁дире +▁Similarly +бро +▁militaire +▁тому +▁Johnny +▁Мексику +ћа +Supp +▁junior +oltre +▁Моск +▁admitted +▁religios +зяй +его +▁tears +ingo +odu +iveness +▁logo +▁último +▁aliment +▁UITableView +)! +▁nj +lette +▁resident +▁termine +▁уже +▁Сте +office +▁carte +▁livre +▁Москов +▁elections +зиден +Trigger +▁Benjamin +addClass +ског +▁Observable +Cla +gemein +▁consent +ври +▁unfold +▁governor +нал +▁toda +Remote +arias +▁instal +fixed +▁decay +▁дерев +xyz +▁DATE +imar +ntil +▁startup +alion +▁kolej +cios +▁ranges +▁stupid +▁implementations +▁rm +ének +▁gcc +▁scène +Navigation +▁  +▁кан +▁towns +Username +▁фе +▁leaders +oit +wär +▁dummy +▁assistant +{$\ +бір +▁roy +▁Layout +▁Jung +Lines +▁Holland +пор +▁Гри +▁Bened +▁Под +xls +▁Gol +▁Aleks +▁ejemplo +▁sezon +arding +footnote +▁Congrès +refer +ската +Iterator +▁ourselves +▁Mic +▁código +▁площа +▁\$ +▁Charlie +Nodes +▁puzz +▁Identifier +▁flutter +▁prü +▁ort +▁Cort +asticsearch +▁Свя +▁Bull +udem +▁apparent +:-- +▁Хар +▁Lap +▁comport +matically +▁curios +▁может +▁Bh +apping +▁basketball +zetek +▁runt +▁Milan +fection +ría +▁Kin +▁slower +both +▁Instituto +▁Historical +▁również +matches +yci +▁espèce +▁Schweizer +NT +SF +acia +forge +Points +numbers +▁falling +▁inheritance +▁Erst +▁customers +▁actu +▁migration +\' +Plan +Mr +othy +▁upgrad +бира +▁Offic +▁Wait +▁toler +ardon +▁slide +)_ +▁став +▁nuclear +▁Bil +owner +▁Harris +Information +▁pó +▁включа +▁nuovo +▁Cav +▁Descri +▁ак +ództ +▁reactjs +▁Adams +▁Alternatively +струк +)`, +substring +▁massive +▁heavily +▁сезо +▁Ana +▁vale +Pad +▁Either +▁rs +anche +▁uploaded +▁(/ +▁спор +▁reduction +▁Tokyo +gren +▁migli +▁iterator +stav +▁supporting +▁österreich +▁NSLog +istiques +rimin +MODE +}}}\ +▁explos +оте +▁(„ +Sal +▁simplest +▁già +▁тан +▁cyl +bir +▁measurements +Created +erek +lookup +wirtschaft +▁Воло +timer +derr +▁стала +▁scenes +▁persu +liest +▁schedule +tal +лено +▁painting +▁improvement +software +▁governo +▁Hir +Execution +▁Okay +Prop +loster +ніципалі +▁peuvent +olu +▁Фа +rollo +▁коло +▁carrière +▁toggle +▁($\ +▁aggregate +▁Бі +textarea +Ok +itto +▁stim +▁recursion +▁Federation +)_{ +ategor +▁distribu +Cloud +▁madre +▁iv +▁Lieutenant +▁substant +▁leaf +▁Kontrola +VA +▁tomb +эн +atoes +▁godine +▁#> +Cert +▁empresa +Props +▁planned +▁randomly +jähr +elem +▁Operation +*` +protocol +())); +wel +▁praw +▁сим +▁wob +▁hace +▁nearest +disable +▁Commun +▁revel +Free +▁brackets +IOException +▁alto +▁marry +▁auc +),\ +▁typo +edad +ará +icator +tatywna +▁buff +orders +▁asynchronous +▁econ +▁feu +▁Iron +▁rising +Radius +clk +▁zweiten +`' +▁uniqu +▁FM +▁Bran +▁flu +▁sensitive +urre +▁Iter +▁Sein +▁diferentes +▁него +chia +▁Anleitung +aturday +▁shorter +▁translated +▁Rés +▁rode +drag +▁lange +Bi +üb +leur +▁ordering +alous +▁Кор +archar +destroy +ervation +]], +AccessorImpl +▁autorytatywna +Sequence +▁proyect +▁bran +▁(+ +▁Kab +▁zem +▁Calcul +▁seul +▁Niger +▁chiam +throw +▁Planet +bildung +▁zones +transition +лений +▁mapped +onaut +Pair +ilian +▁Morgan +▁unto +jou +▁hid +▁Meta +▁elles +Lou +rama +geordnet +▁scarcely +▁mint +Focus +▁Alter +▁dio +▁ampl +ièrement +▁исследова +LED +algorithm +▁сайті +▁"") +History +pk +▁Whit +▁систем +▁Kirchen +rà +APP +▁<% +antine +▁Disk +conv +welt +▁Fut +▁Nom +ordo +ellij +▁receives +cow +ytu +▁obras +▁purchase +▁earned +▁accessed +axi +▁Mans +ivan +▁tuvo +▁Trace +rimonio +▁desenvol +érique +▁resulted +▁computing +▁inspired +▁Prize +*" +Comput +▁extensive +èg +▁Portály +▁castle +▁*. +▁photos +▁voet +ONG +▁Alle +▁threaten +stüt +▁albums +▁dense +flat +continu +Subject +▁readonly +Opt +писко +▁Aber +▁Position +▁Today +▁mini +▁Bef +listen +ственного +SUB +ossa +▁Pope +▁Jimmy +▁Дру +ungsseite +▁tren +optim +itsch +▁samt +▁испол +&= +▁Przypisy +▁продол +Cr +ermann +▁матери +▁Hugo +▁Deze +TRUE +▁defeat +▁watched +▁Gent +AUT +orous +▁опреде +orientation +▁distinguished +▁mesmo +▁sli +мена +mittel +gericht +eton +->{ +▁wont +▁weg +▁classific +ilus +▁MD +tasks +▁chim +await +▁gang +▁wię +through +▁Russell +▁guessing +▁акт +блі +categories +сут +▁Fen +▁муж +▁newer +▁Async +▁terme +>/ +пара +▁Trust +▁Opt +▁dah +▁wonderful +adratkil +▁Гра +mapping +▁discovery +▁BE +Enable +▁Friend +сня +▁controlled +чної +▁contributions +jší +▁Lev +▁francés +▁mic +zik +▁alem +cancel +!' +▁grat +▁Begriffsklär +Camera +ificación +ród +▁Arnold +▁bezeichneter +▁fought +▁deput +▁Drop +tax +dg +▁Hop +GN +▁Kirch +▁Бар +Invoke +▁erhalten +▁veel +▁wordpress +▁INNER +transaction +▁déjà +Fact +▁надмор +▁angularjs +▁át +▁alap +▁Price +▁effet +▁sphere +ClassLoader +▁rugby +▁kingdom +▁Mut +▁кино +▁reward +cit +▁presente +Sto +Character +logs +▁centrale +▁mouv +▁okay +▁aplic +More +ények +▁Köln +nett +▁истории +▁describing +▁soldier +▁Need +Light +▁"\< +▁hav +ermo +▁inferior +lea +▁gg +▁конце +fragment +sb +Country +▁vě +▁Beng +▁Это +▁водо +мар +STRING +▁új +multiple +statement +▁involves +▁tecn +Student +gré +▁lean +▁bringing +▁Medical +▁програм +▁Vog +▁жов +▁Spirit +nth +▁standards +▁Profile +▁ez +▁территории +▁stem +uil +▁Og +Btn +nal +▁nearby +▁producing +criv +▁assumptions +▁Spark +▁Lot +itudes +afka +five +atio +▁distinguish +rock +église +▁rappres +>\< +лій +▁мини +▁intitulé +}}(\ +▁Rout +▁Border +▁overrid +HOST +ritten +say +▁Чи +ichtung +▁straightforward +obb +▁Terra +▁[: +Ben +▁composite +)+\ +▁crown +direction +▁несколько +▁avail +▁purchased +hook +eties +▁fase +▁Rum +▁genom +▁dét +ową +mpeg +▁Ін +desktop +▁injection +agle +▁Edd +_{( +▁Hem +utos +proj +▁superficie +Plot +▁Docker +ätz +kreich +▁unclear +▁Unity +▁streams +вид +▁simplified +Fill +▁sant +▁Kommun +▁duc +▁две +▁obs +žit +▁Janeiro +бя +▁presso +▁Ministry +▁burst +▁reaching +liter +▁responses +▁Eug +▁sod +▁Cord +▁Perm +parts +цима +variables +▁forgotten +Fern +ostęp +vl +▁См +kim +ając +наль +гле +helper +dup +euw +fra +ellite +anya +▁reign +gesamt +седа +▁Ryan +▁formatted +▁Borg +walk +▁ал +agnostics +▁Cape +▁Franco +▁fug +:) +юз +Fetch +▁roughly +▁Mis +uetooth +▁Venezuela +▁astronom +")` +ombres +▁которой +óp +owed +HR +▁Camer +кие +parison +▁Bij +templates +environment +ização +▁ér +▁plenty +▁TypeError +▁forty +коном +▁Sed +▁thats +▁gravity +▁spiritual +▁duplicates +▁encryption +▁reven +getInstance +ällor +disk +▁thro +▁Nak +▁poł +▁heraus +invalid +sBy +Boot +▁bucket +▁Parse +hex +Conne +▁Computer +zyk +▁induced +▁Bruno +▁addressed +mania +▁inclus +ounced +scriptsize +▁Epis +▁vocal +▁Jonathan +ум +staden +▁Children +пей +Italia +reibung +▁nost +▁ещё +▁Werke +▁actress +▁Minnesota +rike +▁tek +▁primeira +▁frat +▁Configuration +▁bid +trigger +Contents +▁constantly +!!! +▁dread +▁hundreds +istische +▁cardinal +TABLE +▁estos +assoc +gray +▁Schloss +▁sche +cong +▁koji +ètes +▁Era +omi +▁SR +▁wrapped +▁trunc +▁ah +egos +oki +mouth +logging +▁fasc +▁Sample +▁conte +▁villa +comments +▁batal +▁García +▁Norte +▁wechsel +▁Museo +▁enfants +▁whisper +nake +▁jednak +lês +enders +▁äl +▁VB +▁cookies +zeti +atum +▁dedu +▁arranged +laz +▁cuenta +yml +▁flav +MR +emet +біль +cmp +ituto +zett +▁envi +▁kot +$: +upper +▁Alberto +kb +Anal +ört +▁[- +▁führte +iah +▁Tun +▁искус +uwe +ispecies +Pub +Sync +▁Colombia +akers +▁Imperial +oving +▁intelligence +▁equipment +ein +dagger +▁Edge +▁Республи +adratkilometer +▁Anto +▁charges +▁Ocean +▁simplify +▁miesz +running +▁Lac +genommen +▁representative +=. +▁Pred +▁spite +ciale +▁nave +▁extens +▁neutral +▁которая +.:: +шёл +▁principales +▁цар +▁tied +▁alta +▁Cit +lined +major +▁punk +▁cinco +ický +▁raggi +typen +тельство +▁conference +▁сіль +▁heut +iš +ета +velope +hbox +nown +▁zar +ktiv +ieß +▁стре +▁EventArgs +▁Ira +▁VBA +▁Santo +▁Fach +▁FF +▁Raymond +мец +implementation +▁brothers +▁côté +▁controllers +▁Cle +▁cable +▁confer +▁{- +▁czł +▁Filip +atorio +▁wicht +▁beaucoup +▁Lit +▁sessions +▁Success +▁routing +niu +▁Vice +▁krit +updated +▁Invalid +▁Mannschaft +▁aos +▁tudi +▁després +qua +Contains +Company +▁persona +adapter +сни +▁voj +▁escri +agt +▁ство +▁distrito +apan +▁aspects +▁zal +)^{\ +▁système +▁ана +iums +▁premiers +▁поэ +▁mère +▁Gun +aping +▁Rain +▁igual +▁processor +')` +bling +▁mism +bráz +▁closest +▁Reading +▁попу +cono +▁kult +▁!! +▁Expression +▁induction +ahren +▁cp +▁violence +ientí +cente +▁Dob +jack +song +bucket +▁deport +кими +lm +▁innoc +Changes +▁prohib +angol +iseconds +▁пор +▁hip +▁pů +endorf +▁scheduled +▁Flug +acyj +▁Films +athedral +Power +ardin +kap +icken +resize +eus +rr +лян +▁Hav +▁ora +FROM +лося +▁terug +▁Width +▁accepts +бен +▁mich +▁Czech +▁Bedeut +▁вид +ôme +▁Loop +spect +ük +eston +▁slot +▁została +▁Charlotte +▁составляет +▁Promise +▁epo +▁diction +▁Franklin +▁Riv +руг +cida +▁Explorer +cookie +▁formerly +▁municipality +▁Stefan +lists +COMP +Len +▁Staat +▁NBA +dens +▁oscill +!. +▁PO +ône +eses +▁националь +voor +▁копи +▁пози +ulu +Constraint +▁своей +▁algebraic +чня +Dict +▁appearing +▁prav +▁Universal +Browser +▁Singap +ennessee +]_ +▁Sof +▁Cad +ounce +▁costs +]{\ +../../ +ській +ühl +iety +пр +▁interpreted +ajn +colog +YS +mans +▁metrics +▁registr +istance +▁Поль +▁anonymous +▁institutions +▁zdob +prüng +▁арти +▁estat +acci +▁academic +▁chiesa +▁Gian +contrib +umed +▁Gir +▁baseball +numeric +Generator +GM +▁tiny +▁distinction +гер +▁rust +▁FIFA +▁Properties +^- +▁экс +▁Stanis +▁Ajax +escape +▁consp +▁Chen +▁Naval +Bit +▁bât +скими +drive +▁Round +photo +▁Level +▁geg +Tom +▁Mobile +▁Trop +Direction +isan +)^{- +▁Setting +▁Probably +лья +▁assets +▁atte +▁bulk +ést +▁wing +nius +▁wins +▁lud +ushing +▁deven +ограф +burger +▁embar +FilterChain +▁tum +▁öss +▁nommé +▁pir +▁luc +dbo +agues +▁alcan +ouwen +▁Stanley +циали +▁grown +▁preserved +▁solar +▁Население +▁performances +▁Cow +▁engineering +▁scaling +atomic +endance +▁ace +ängen +Anim +phase +zburg +Old +▁servant +▁gemeins +▁Observ +translate +▁covering +▁están +▁problema +▁установ +▁llev +▁czerw +éal +mez +REE +ERR +тури +segu +▁profit +▁multiplication +kommen +▁faut +▁candidates +▁Uri +▁Laura +▁sap +▁висини +▁Between +fade +▁reserved +▁involving +▁Mare +▁Container +▁назна +▁DEBUG +▁hurt +▁Polski +▁lux +CB +wach +▁период +▁Catherine +▁ganz +uchte +▁consumer +▁crossed +ordered +away +techn +▁subscri +▁shortcut +▁производ +▁simultaneously +▁rating +▁Kings +▁relationships +▁Sex +▁Tool +agh +acters +logger +homme +engers +▁Ri +earance +▁appearances +Real +▁passe +iclopedia +чко +terre +▁Ontario +▁переда +footer +archivi +ifiz +▁Protest +▁LIN +unnable +▁centuries +▁Bayer +цію +овин +▁Andrea +selection +▁calm +▁modification +▁shortly +inaire +▁fusion +▁feelings +PK +▁Roberto +гне +Shared +▁mehrere +▁Niem +omp +Env +▁Article +▁Pok +▁VARCHAR +▁dil +▁afford +▁confront +owanie +▁ministre +adesh +▁Poly +▁Распо +▁Gruppe +▁Helen +▁cc +▁portrait +bew +▁beta +▁Wir +▁Audio +▁(\< +riority +▁nit +▁представи +▁Vie +▁wür +▁Hold +▁Sad +▁Tochter +▁oltre +▁Activ +▁Jason +▁wieku +▁regards +▁taste +agnostic +лася +▁Self +▁apr +▁Deep +scop +Activ +▁typedef +ContentView +compiler +▁Roth +xc +зик +▁largo +▁Rena +heiten +▁platforms +ulla +▁glance +▁mascul +▁mex +▁Jorge +▁funcion +choose +▁reviews +▁Alban +▁Glo +▁Species +▁Fame +▁Roll +▁Puerto +▁\) +ymnas +environ +▁iphone +▁Wrestling +ały +▁Indiana +Radio +VS +▁independence +тай +▁decode +White +▁journ +ículo +▁Barb +▁Evangel +▁Andy +▁Welcome +▁Device +gef +▁remembered +▁variations +▁Adolf +itaine +▁надморској +▁steam +▁concerns +▁`| +▁био +тельства +▁quattro +extend +▁trabajo +enberg +▁scenarios +ânt +▁kommt +▁domestic +▁Basketball +▁Cooper +sock +держа +={\ +▁inici +▁Phill +▁генерал +archiviato +ън +Rob +▁tong +▁characteristics +▁amaz +▁Mode +▁inaugur +wehr +rant +ionali +▁Mother +Ma +équ +▁Kelly +cile +▁besteht +▁estimates +ruguay +▁Ans +Mad +▁нав +▁données +▁tropical +▁Several +elter +▁Pho +kem +▁Customer +▁складі +▁courses +Platform +navbar +learning +▁Swedish +▁zast +▁Lig +management +▁lod +uffle +Texture +arga +átum +▁DDR +нії +▁Société +▁domains +▁permitted +▁externe +▁quelque +vt +yman +▁Ward +▁agli +▁andra +Snapshot +▁må +▁yeah +дена +ępu +askell +▁République +inject +▁'; +änn +▁zelf +▁Entwicklung +ária +onomy +▁svil +iese +▁conser +▁nim +▁rész +▁Итали +▁partici +▁Lion +sr +always +▁Владимир +ческие +[, +▁Definition +nant +oem +Ids +▁вне +▁[...] +▁направ +▁GO +▁års +▁után +▁outros +▁región +▁Mong +▁filme +▁triple +▁spons +Develop +▁outcome +▁Bible +▁имени +Canvas +пута +curr +ások +){\ +ningar +`; +▁Flash +:# +must +cpu +▁formats +Har +▁episodio +▁Rosa +▁dès +emit +riteria +Annotation +Flag +gmail +▁Normal +ollary +▁foss +▁concurrent +▁crashes +▁виде +▁Minor +▁Sit +▁SN +▁scar +▁femin +▁specification +soap +▁operate +▁principalmente +▁aust +ibile +itime +лежа +iframe +▁concepts +▁tack +▁viss +▁carbon +tery +▁naming +▁Orts +idente +▁Capit +▁expr +▁насељу +▁Selected +▁hinter +▁iframe +▁zb +indexPath +coll +▁wrześ +▁acht +▁gradually +▁чу +зей +haft +▁tran +▁laquelle +ytics +IDE +▁pygame +▁Package +▁className +Bal +perl +тина +Occ +▁infrastr +▁Champions +▁classic +▁Raw +▁partially +▁Ted +▁stolet +rained +WHERE +▁vall +▁Julia +zat +▁surrounded +SEE +▁walking +Bad +FOR +contre +▁Palest +ático +▁engineer +▁partners +▁Jews +ilers +▁cerem +▁interactions +acu +sty +▁Princess +sharp +▁Singles +▁їх +chez +Receiver +▁patients +stringify +▁competed +bey +$; +▁Bd +hadoop +▁División +öld +▁restricted +▁commander +▁Highway +▁Česk +▁myth +чан +raham +▁enqu +▁pog +▁comuna +▁println +▁круп +▁depois +▁seats +▁neighb +циона +agine +▁clothes +▁Prior +Brain +FFFF +':' +features +▁filesystem +▁singles +▁Melbourne +▁destruction +▁Lyon +▁Insel +Nav +▁Replace +▁lé +Who +▁Estad +▁dimensional +▁öff +▁grands +джа +plane +ності +▁Origin +WI +änner +▁Cry +ITION +▁född +▁cultura +▁Rank +▁vuel +▁zag +▁Maxim +ону +())) +Raw +kirche +▁además +▁tie +▁Style +сков +istant +olph +▁Zür +▁Info +DOM +usc +nahm +▁Федера +▁Fot +▁specifying +▁titolo +▁Boys +iech +Place +▁Hoff +▁cached +валь +isher +rolling +opens +▁hr +------ +▁maggior +▁transactions +▁criminal +▁retre +▁Campbell +)): +▁ned +Pager +▁Hero +(__ +▁uncle +▁reaches +arto +▁hello +Preferences +▁затем +Named +▁readers +хі +kern +▁упо +кин +▁lav +▁nob +▁secre +▁ListView +вания +▁Mayor +borough +▁filosof +нення +фри +▁patr +FM +▁acid +▁Salvador +▁abb +▁Graham +policy +negative +ńskiego +▁Heimat +▁dazu +▁mely +▁ride +▁duties +overy +▁Proposition +▁Paolo +/' +▁Mau +imenti +Saint +father +▁equilib +phony +▁clas +▁отли +▁Buffered +rek +▁mitt +▁Hur +▁Harvard +▁demonstrate +uario +▁dolor +▁rejected +▁Müller +▁nac +▁Belle +▁gathered +nr +frika +öll +▁chemical +nig +▁calc +▁DEFAULT +▁philosophy +▁Laravel +▁alignment +EV +eor +▁dzie +▁mest +▁Io +CRE +зви +▁Medic +▁nä +▁zab +▁Slov +utlich +▁amplit +▁Frankreich +▁кіль +IND +execution +▁Karriere +dostęp +▁réal +engo +▁severe +зма +▁турни +▁Carter +▁Robinson +getElementsBy +▁prototype +▁japon +führung +▁consegu +▁studi +▁lire +▁schließ +▁Buff +▁redund +▁ern +▁myster +▁proprio +ateful +▁Parent +▁ladies +rack +тика +enburg +▁качестве +▁EF +▁stam +▁nueva +▁filtered +reten +▁Ian +▁Matthew +kih +▁ő +▁компози +▁forever +oires +:\\ +▁études +▁soup +▁pleased +)}( +▁Stop +Setter +▁Help +▁bars +▁ERR +▁(? +▁poetry +▁Util +AK +▁fick +▁IM +▁proud +носи +▁muerte +▁Palmarès +▁Nas +щих +▁quer +▁apenas +][' +▁Konst +пон +▁Schiff +▁mp +▁благо +fram +▁household +▁tract +encoding +▁undert +▁Aug +ован +▁Arten +▁invoked +▁dynast +▁fleet +чество +▁Murray +▁gut +elihood +▁SSH +ответ +▁personally +прия +▁financi +▁Thompson +alu +identity +▁Grab +addle +Ét +▁Tob +▁verlor +▁Sainte +▁dop +▁вере +___ +▁promotion +▁-= +▁отде +▁ambigu +ORDER +▁Communic +▁imply +oned +cluding +▁collision +▁fragments +scription +▁'{ +лях +▁hans +ус +wire +namespace +▁sword +refresh +▁kwam +zs +commons +▁cosa +▁regime +grep +▁dioc +▁Contact +▁estas +▁Stewart +▁viele +това +▁Ran +annes +iday +▁snapshot +orrow +▁zač +▁участие +▁promised +Assembly +▁championship +▁Define +▁eren +▁ново +▁thinks +Age +▁gev +varchar +ività +compos +▁Mutter +CONT +armée +agnet +▁Brow +.— +▁Television +▁Для +▁vm +▁ordin +▁Михай +▁aproxim +')-> +▁zoo +ippi +▁sino +▁Québec +rages +äck +eing +arlo +pios +▁Chan +▁elli +▁incons +gestellt +ppers +Jean +anstalt +▁Dance +▁toen +▁decis +▁Резу +▁officially +ätze +▁доро +▁enumer +▁troisième +typ +offs +боль +odn +▁Zar +▁друго +quia +▁Nicolas +пису +▁mob +paces +нього +Alg +éroï +Errors +▁гре +▁женщи +inch +▁Korean +▁Apost +▁Liver +▁elementary +▁DI +виси +▁soil +▁DLL +▁risp +▁Shakespe +▁Gaussian +▁Kurt +Vertex +ebol +organisation +ären +▁YES +CUR +▁началь +▁постро +▁Luigi +▁caching +preventDefault +amd +▁Vit +subst +▁строи +▁Campion +chr +фере +▁Список +NF +▁cím +▁hé +rebbe +ocy +below +▁bylo +▁Уи +▁\({\ +▁`: +giore +San +▁Gate +▁вс +▁olimp +▁Matrix +▁hearing +rii +tfrac +▁allemand +▁Vue +лн +▁compiling +▁Ens +▁investigation +▁Ax +▁chars +▁targets +▁loud +usement +▁Nether +commerce +IGHT +ocoa +ifecycle +▁Leo +priv +▁goods +adamente +Austral +▁reboot +Gest +▁representations +ceu +▁doctrine +cers +▁Krak +▁advoc +▁squadra +▁arbeitete +üst +▁pill +Answer +▁квіт +▁Wa +umann +▁Dynam +Famil +▁tennis +▁Engineering +▁circles +▁Maryland +▁besta +▁bases +▁znajdu +ктора +▁arrest +лер +▁Gia +▁remarkable +▁могу +▁Supreme +▁`% +dor +▁aujourd +▁wis +WIDTH +▁misma +▁fluid +▁petite +▁Tow +Registry +emed +▁Wisconsin +▁Racing +▁registration +/% +third +▁monuments +чей +▁jet +▁Urban +álva +▁milieu +▁possess +▁germ +dependencies +▁enemies +▁samen +▁Werner +▁hizo +▁td +▁yesterday +▁Ад +▁hasn +cellation +ování +lika +Week +▁Ing +▁Email +▁mètres +▁OCLC +▁amongst +▁splend +fur +antics +▁XXX +▁группы +lach +▁cousin +▁invariant +ђу +▁Beispiel +▁harder +▁bell +▁orch +tb +Footnote +regon +Martin +▁incon +▁attacked +_{- +▁Tras +party +iteit +▁saint +rások +▁containers +Mo +▁Sn +quantity +▁ras +▁Canal +ccion +uvo +▁idx +typename +▁Rugby +▁Seems +▁transmit +▁Präsident +зне +▁Baker +inth +▁több +verein +▁especie +,( +▁téc +▁WITH +▁unos +▁politics +createElement +▁stats +▁Tennessee +▁Bedeutung +▁Screen +▁Straße +anze +▁partly +manuel +olation +horizontal +érieure +ampio +▁струк +Weight +Land +poly +▁Dak +▁Assume +".$ +▁casi +▁gross +▁entertain +▁década +'.$ +encer +▁guaranteed +]$. +лися +▁acceptable +raise +irus +weit +▁Ана +▁hills +ipage +BIT +▁nucle +▁utilis +CAA +ènes +▁Schweiz +▁AA +ninger +▁bands +▁tender +som +Warning +▁Bischof +▁Arc +▁Woman +▁transmission +чни +istre +BY +▁SI +▁Пар +▁}). +▁presenta +▁René +▁happiness +▁Punk +cols +▁Desde +рёх +▁мона +▁scratch +▁tcp +êtes +itated +▁diferen +geh +nahmen +Пе +cki +▁Teatro +▁Remember +▁fright +▁Yam +western +leted +▁встре +▁település +зин +▁Quant +▁supre +ája +дія +▁carrera +kret +para +▁SUM +▁pit +źdz +éo +рення +▁Chor +▁voix +▁executive +▁allerdings +Maybe +▁день +▁flying +▁parliament +ждан +▁fram +▁жовт +▁ugly +▁буду +igny +\|_{ +▁bitter +sce +▁pole +Verlag +▁totalité +▁foundation +jt +▁slice +ifique +▁integrate +strij +▁asympt +▁ему +▁perturb +▁Flow +jboss +RIG +▁Aless +XXX +▁summ +sqlite +▁cheer +prob +▁GPU +ził +(*) +▁induct +RAY +blatt +questa +oru +▁Inside +▁McG +▁Nep +мп +▁inve +▁Animal +▁sob +ított +loyment +▁bund +Station +▁BEGIN +▁partiellement +igg +estore +▁coinc +▁Sommer +▁md +▁locked +mathchar +arma +pent +arium +▁ears +▁Songs +▁similarly +▁literally +▁inches +▁affection +lp +▁concluded +▁муніципалі +▁памя +estaur +▁Josh +▁Fritz +DBC +дён +posa +▁golden +▁pc +▁comte +▁Ziel +▁présente +marks +igneur +▁Drive +▁neglect +▁rozp +▁Five +spaces +▁Medi +▁existed +▁była +джи +▁frente +тник +odd +▁answering +bian +▁Eugen +▁Publications +▁Dia +lá +▁'_ +▁recuper +ому +▁Append +obar +▁employees +▁compens +emetery +▁элект +MON +olin +▁historic +his +ąd +nm +▁Goth +▁stress +▁partecip +▁Aw +▁sar +▁hu +▁matplotlib +▁Myst +();` +schein +Longrightarrow +▁ря +▁Isra +[^ +nou +▁synd +working +▁Nation +▁Pent +▁klass +▁applicable +▁Diam +▁brasile +▁pac +▁Height +Put +▁intro +▁unusual +nas +▁Gebäude +▁beam +▁Rect +▁Primera +▁haut +▁trait +prüft +inación +▁configurations +▁gilt +▁territoire +hez +▁alte +relative +Excel +▁Wright +GV +поли +Quant +▁gauge +▁multiply +ASS +ственно +ану +▁jeden +▁literary +▁Dro +▁advise +itzen +▁disag +website +▁дія +▁observer +▁január +vě +kup +▁Ses +▁wojew +▁stages +▁времени +łuż +нос +Download +ipo +▁graf +▁робо +▁Nikol +▁fic +▁joining +▁diversos +▁LIKE +▁Fitz +▁dimin +▁distrib +Sam +koz +▁alphabet +oser +OUR +uka +кая +▁steel +▁`-- +▁tener +marker +▁Heaven +newcommand +▁prisoners +▁Knight +▁presents +▁questi +▁trains +opera +▁Linear +▁ME +▁Buc +Leg +▁agua +▁Griff +olg +dst +. +▁persones +Mal +бере +folge +▁acab +ctu +ptic +▁Navigation +Russ +галь +▁Ful +▁має +чная +wner +contra +▁joueur +▁Jess +▁renew +▁lap +▁casting +gal +▁tématu +▁называ +зах +чне +)-\ +▁часто +}$- +▁licz +▁emot +harm +▁occasionally +▁horror +east +▁printer +aran +▁Mississ +follow +▁Barry +▁investigate +gow +▁Americans +Since +▁відо +▁reun +osci +▁Chapter +▁bay +роме +ethe +édie +comot +▁miejscowo +▁studierte +ouvert +▁кур +▁DESC +▁touched +▁Jerry +uese +лище +authentication +▁colle +heart +▁regiment +cribed +▁Боль +▁проис +ceae +▁masses +▁scrolling +usto +SW +ovat +▁grâce +▁Архив +▁Север +avait +▁Marshall +▁HashMap +acon +ücken +[]) +▁evangel +etzung +ttemberg +sters +TM +▁литера +quot +Pred +▁werk +▁haber +lava +vous +▁Late +cycle +тирова +▁проду +▁populations +▁Yan +Prefix +actéristiques ++' +()`]( +▁Ль +филь +▁жизни +ftp +▁всех +▁gdzie +▁videa +oauth +▁pid +ům +▁pesso +▁tracking +izin +▁Morris +щий +▁Provinz +▁Mitte +▁artificial +brázky +▁дости +▁restored +▁communicate +agit +Recogn +▁lon +▁заня +▁Argument +flush +мана +seconds +UC +▁Ruth +▁tub +▁Bret +▁Pere +▁responsibility +ńczy +▁environments +kee +▁groot +▁painted +▁Éditions +cpy +árt +lichkeit +arda +Batch +▁Leopold +reason +noreferrer +sens +▁rocks +▁Hitler +лат +▁quoted +▁колле +▁уров +bag +.") +▁ML +▁komt +▁[_ +▁spectral +edo +▁insieme +▁suffering +slider +▁Kennedy +olate +▁Patri +зии +OH +▁теа +▁права +мах +rewrite +▁Einsatz +external +holds +▁Places +atype +▁vulner +▁abandoned +Origin +▁maximal +AAAA +▁Baseball +▁Close +▁painter +▁assigning +NB +blast +▁Künstler +)]( +fach +▁Constantin +okes +▁nobody +▁subtract +▁fosse +▁certific +▁muse +/), +▁Profil +▁proxim +▁Jerusalem +▁simplicity +▁wsz +NUMBER +uttavia +UITableView +ichter +жан +▁Lav +itchen +▁Чем +Tu +▁geom +▁zvuky +▁Survey +ANCE +▁encrypted +prof +▁dare +▁Loren +тв +▁Алек +▁computers +▁expectation +▁substantial +▁Дми +▁`{ +▁дра +ubble +▁performs +▁Krieg +▁incoming +▁Classification +WebView +▁episodes +apper +äufig +▁giov +▁Depart +бора +edly +ospod +▁ptr +▁dátum +▁estimation +icole +▁---- +▁princes +HEAD +▁diffusion +▁drie +▁Ada +нице +nginx +shal +▁februari +▁Tat +looking +kund +▁Dean +mongodb +вших +▁Aur +▁Flora +▁Studios +ције +eil +Install +▁franch +▁HMS +▁practices +lej +dale +▁poste +▁Hels +▁reliable +ździer +▁verse +ermeister +▁quit +ético +ilis +edor +▁Cultural +дже +▁liked +▁mongodb +▁Broadway +▁IR +eszt +hov +▁míst +reiche +▁kB +стом +▁SQLite +▁torneo +\. +Ord +▁Administration +▁зда +▁Hinter +▁Via +Decimal +orious +▁nécessaire +wx +▁tej +▁tema +Obrázky +рите +▁builds +▁laten +▁гг +Visibility +läu +▁sechs +▁луч +cera +Could +▁traject +}}^{ +▁Japon +another +IK +▁belonging +▁facilities +▁Daily +▁dece +intro +▁случа +Namespace +▁Bak +locale +UG +=${ +▁compañ +jąc +▁arithmetic +forum +▁porta +onk +▁gender +▁expects +бка +▁nak +▁Grace +▁stro +ividual +▁COM +▁Farm +▁canton +тому +javax +сей +▁briefly +Face +rotate +constant +▁gallery +astro +allery +▁DJ +charge +ходить +Cent +\", +▁donna +arca +lade +zin +▁Ned +▁hosting +idor +itative +igs +▁пря +▁ticket +▁studying +▁designer +lapsed +▁laat +▁dix +▁integrated +▁informed +▁behave +▁labour +estellt +calendar +▁killing +▁twitter +iae +▁historique +DEFAULT +iała +▁theoretical +▁unders +ляет +atan +▁surname +▁intercept +гласно +▁општини +▁tired +▁Beth +▁административ +Li +▁Тур +▁Scanner +▁Stern +▁вместе +▁reporting +▁sull +цией +berts +ogonal +ők +▁ipsum +▁seulement +▁Seiten +wordpress +▁featuring +istischen +jub +▁étr +▁tea +▁adapted +▁scales +▁nan +getValue +▁Blues +acles +▁stati +▁entitled +▁Ralph +gravity +▁entrepr +któber +limat +lis +Demo +relation +▁nep +prowad +itis +▁pup +nehmer +▁disappoint +▁etwas +annon +▁approved +▁clever +Loading +▁verz +resse +▁inspir +▁sampling +▁Bek +})$. +▁грома +▁specie +▁repub +▁loader +▁erf +▁shoulder +rais +▁мате +▁Month +Scene +▁blocking +▁ocean +geben +▁Kilometer +▁bedeut +▁Mix +fmt +▁Norweg +▁IDs +parallel +▁anticip +▁revis +хан +▁свет +CASE +▁führt +▁atomic +▁darkness +▁Fußballspieler +▁Жи +quisition +▁Sieg +Circ +▁cientí +nelle +SHA +▁urb +▁ksi +leqslant +▁фрон +▁defect +▁rá +▁stronger +▁pł +▁communities +нина +enas +iennent +▁safely +▁тя +▁benchmark +▁Braun +methods +argument +vos +obox +рови +▁recherche +mn +▁brings +machine +CESS +hosts +▁NY +Autow +▁современ +▁Gary +▁sensor +▁documented +▁prendre +▁peer +enix +hai +arbe +цент +_( +▁URI +ева +▁Regie +▁Monument +▁onderwerp +Bag +tit +▁stir +▁nerv +сторія +▁sov +▁writers +▁sorts +absolute +▁difficulties +▁parlament +▁IEnumerable +▁dissol +▁CHECK +arina +inburgh +DM +▁eind +▁budget +▁certains +▁första +anja +▁годов +▁тек +▁Duch +gui +▁Teams +▁многи +Marie +Integr +ThreadPool +rust +ík +%" +enf +spl +▁begun +lou +▁RewriteRule +tuple +aneous +▁marine +attan +ikal +▁graduated +illé +▁прове +▁Роз +', +▁Pfarr +▁nivel +▁працю +music +▁setTimeout +ERS +▁Erik +pit +▁Хро +▁pił +▁peri +док +uszt +▁Bear +ClassName +▁Parlament +▁aix +▁invited +▁PATH +xter +▁Race +▁hecho +▁Tower +▁utf +actly +▁буде +▁angles +няя +ouvelles +▁climate +▁singing +▁navigate +>'; +adows +▁leta +▁Sitz +▁partitions +▁dock +▁ży +▁allocate +▁benefits +▁nieder +xpath +meck +älle +▁coupling +жил +ForKey +argent +clou +▁instruments +▁enthus +▁még +▁Пав +▁Rach +----- +▁APIs +▁Vier +Cmd +itore +▁Cuba +▁dátummal +▁embedding +stdio +▁Gilbert +▁geprüft +▁stating +▁triggers ++= +▁spécial +▁deliber +мин +Produ +▁Stati +▁zus +ktionen +Dispatcher +idal +▁LP +optera +▁estar +▁значи +смо +ouses +engono +▁WPF +publish +▁teor +elif +▁erg +▁separation +Pan +▁Orchestra +Peter +bounds +▁Shakespeare +▁cantante +▁demi +▁Popular +фр +arring +цин +▁Ис +von +▁substitution +▁línea +\}$. +como +▁важ +wagen +▁rarely +▁periods +glob +▁Frid +▁Terr +▁Release +Brainz +▁граф +DIS +compatible +▁poč +LIN +▁Källor +▁Arizona +ppy +Seq +▁Ain +▁Tourn +brow +▁Kör +▁ash +ogeneous +▁dialect +▁насеља +mysqli +цов +▁flor +▁фло +IAB +▁Within +^( +▁bois +▁tank +▁affili +▁hijo +▁Kate +▁Verl +▁Miami +▁typescript +њу +▁Vern +▁висо +iemann +▁coverage +brie +▁Starting +numpy +▁Jenkins +▁két +▁grup +▁Scient +▁interrupt +▁blob +ugel +▁Orth +abama +▁Bapt +ownik +▁быть +▁Julius +▁През +▁substitute +supported +chy +egyzetek +▁Performance +lessly +Constructor +▁extending +▁Muslim +Overflow +▁Jenn +▁produz +мії +▁países +▁eux +▁fate +ologe +ук +▁wobei +▁Sachsen +▁сайт +Models +▁Fast +besondere +▁FR +▁acon +▁Denkmal +▁anch +▁público +▁Tas +▁cand +▁paździer +▁Мон +▁versus +rut +GT +▁inserting +▁canad +єм +▁Metro +▁Herzog +Ignore +▁decrease +▁пун +▁Fischer +▁Mall +▁nörd +iostream +▁Luxemb +payload +▁Zeitung +▁modifying +▁Cher +▁Luci +nx +▁loose +▁topics +▁varied +▁pg +ajes +umm +Views +▁Beau +MAP +ipeline +▁Interest +arith +▁según +▁Gemeins +▁Attribute +community +▁центр +▁kilometer +▁économ +laration +▁къ +▁carriage +▁Lane +▁необ +kur +▁AF +INTER +))$ +▁beide +destination +▁fonts +appendChild +▁MAR +▁gay +mil +lesh +èt +▁Wang +▁Years +▁Symbol +Live +quency +▁Users +▁Unicode +▁Sau +▁tons +▁Ні +▁краї +AXI +▁Pick +AI +▁hath +▁ainda +▁papa +▁Censo +▁Bald +▁Насеље +▁simulations +▁jaren +▁inherited +▁той +▁feels +ression +▁október +bid +ási +▁muss +ventory +▁meist +▁bore +▁slider +дели +\; +▁extracted +кур +Edge +▁perf +▁Brigade +▁град +ienie +▁Norden +▁cancer +"/ +Cur +▁Сере +▁liquid +structure +▁choosing +▁Perl +Side +üs +ритор +▁kost +▁packets +▁которого +▁Comun +▁fingers +ográfica +>: +▁championnat +▁blieb +▁Situ +▁suic +andis +Fre +▁Conc +▁republic +▁armed +▁hell +▁hög +ragma +▁ense +▁acres +▁Від +▁Reform +MainActivity +keeper +erb +▁monaster +subsubsection +▁Див +▁creature +▁indicating +▁urls +▁kein +образ +pick +▁Admir +▁oldest +▁muz +▁contradiction +▁probabil +illiant +▁pav +▁papel +ubs +▁жена +AML +▁recip +▁COL +added +▁clue +▁Ukraine +▁jelent +чень +▁mathematics +Accept +▁сот +▁север +▁isolated +▁поя +wür +Router +CAT +rgb +▁Lov +mutable +▁Wes +▁Italien +Drag +enium +atting +tcp +▁erfolgte +▁Beit +гато +▁Systems +▁reserve +eree +▁Пари +▁зали +▁rent +▁sunt +▁Girls +▁Ernest +▁fits +▁oppon +▁живело +▁avaient +▁Florence +▁числе +▁engines +Dynamic +▁stycznia +▁bias +▁Exchange +дий +▁historiques +▁Hä +hod +▁wł +schap +▁lac +▁Foi +▁dwell +▁Unternehmen +URN +▁kilometres +▁Однако +кли +▁Sri +Groups +mind +oslov +fern +egu +abeled +Fiddle +▁Century +/- +▁Jegyzetek +Hen +ensemble +▁Gut +_{{\ +▁ranking ++$ +ала +▁#{ +imientos +achim +rides +▁Klaus +▁intend +▁Kentucky +cipe +▁Dienst +▁situated +▁póź +▁scrit +clip +нет +tables +▁Nied +▁McK +▁powst +▁kunnen +▁Evans +жды +вать +uchar +▁residents +iak +▁Resol +▁veces +▁satisfying +INF +▁син +▁crossing +iben +▁широ +pto +ILL +▁роль +▁aktiv +▁обращения +Wikispecies +▁Höhe +cro +════ +altra +▁FILE +▁ups +▁allocation +Michael +▁acknowled +Linux +▁metros +tte +afen +▁xcode +▁тради +species +▁injury +▁самы +▁lattice +Material +andenburg +▁huvudstaden +story +▁varying +▁követ +▁Российской +irse +▁drum +Pressed +Lar +▁Agu +▁weil +▁commence +▁Según +Gesture +Shape +▁Vors +▁succès +▁corrected +Kar +▁cruel +▁politico +▁Schriftsteller +▁risult +etu +archiv +▁género +▁Lü +▁triumph +ORS +Lu +▁personnel +▁Hills +asset +domin +Receive +▁Oak +▁Kno +▁Theory +irie +owan +▁estava +▁executes +йт +ópez +поло +ética +▁название +▁converges +▁notre +▁populated +▁movements +▁statistical +▁Zweiten +quin +▁importantes +▁klein +▁Segunda +schließend +Failure +nar +dag +▁ruolo +▁fiction +▁использу +▁crisis +▁Getting +,% +▁армии +▁campus +▁footer +▁días +бан +▁liberty +▁gh +▁chamber +▁districts +▁excited +▁canción +tero +▁Working +▁części +льный +▁forum +▁Ehe +▁ката +itations +Tools +achiv +▁cres +asto +▁rever +▁nazionale +▁doors +▁Nancy +▁islands +Imp +▁Chair +▁vorm +sein +▁доку +erset +▁tätig +▁Krit +▁пя +▁conservation +▁Partido +minipage +Validator +▁recovery +▁NASA +▁breast +ilty +analy +elines +▁Saturday +emark +cej +Zero +▁Turner +secure +Exists +▁Rick +evalu +ctrl +▁compression +▁CURL +textcolor +)\, +longrightarrow +▁Fernseh +icha +▁loi +▁Оте +▁cave +▁dozen +▁explaining +▁innov +▁Nicholas +▁diameter +▁Marian +▁fires +▁artifact +▁Parker +▁Bund +▁verte +▁talent +▁Lucas +reverse +▁folgenden +▁Sah +jections +▁invece +▁costitu +▁ssl +}}^ +▁violent +▁spos +Rout +jdk +▁заме +▁furent +andal +Hom +▁Senior +▁pounds +▁Discogs +▁зе +'}[ +▁Napoleon +ordinates +àn +▁kurz +▁vere +▁reuse +▁Ген +▁Syst +▁disappeared +▁Watch +bibliothek +▁корпу +▁Cs +▁}` +▁rör +▁дела +VB +▁calculus +рода +▁judgment +atile +▁longue +▁Hus +Jac +}}) +RIPT +IABot +▁após +▁aston +Webachiv +▁URLs +▁coat +▁эконо +▁lear +extensions +▁Classic +TI +▁Tage +▁lá +▁semb +▁développement +ISTS +▁solves +,\, +▁чемпі +ordinary +▁Bav +▁muchos +Self +▁Май +▁Diet +▁necessity +від +▁mano +▁Ср +▁carre +▁Camera +▁Narod +▁Phone +▁polym +imore +isEmpty +▁Houston +▁Rece +▁presentation +ниципа +▁Db +▁confident +▁}{ +▁bullet +▁{}, +ANGE +▁Notre +chin +▁Dragon +erca +iali +▁asset +▁muito +▁deeply +▁restriction +▁commerce +▁Bomb +caught +qq +▁Arag +▁немец +▁Analysis +▁článku +▁baby +▁echter +▁одного +жена +▁whitespace +çu +LIST +frique +▁varias +▁Wit +▁Licencia +Exit +▁sierp +▁assemb +▁splitting +▁palace +▁blocked +▁boundaries +▁iterations +▁Rotten +▁Verkehr +▁weer +Tests +ifting +▁regul +▁persist +▁Solution +pb +▁collapse +▁arrested +▁predicate +▁Zone +▁ingen +zález +▁banks +plant +▁Nella +▁бан +▁Snow +▁Kreuz +ício +▁enters +▁expose +či +шие +Qual +▁landscape +▁подацима +mai +stag +ований +DEF +[]{ +▁dernière +icut +▁Xml +▁subgroup +▁Polsce +▁Warning +▁vehicles +iot +▁dll +ront +▁Louise +▁ara +▁Scala +▁canonical +▁placing +ERY +▁Jag +▁virus +emu +▁}); +▁мм +▁Trying +▁Lexikon +abord +▁expedition +▁demanded +Zyg +lein +▁verwendet +рина +wol +▁pivot +▁однако +▁propriet +▁awards +tout +▁assim +▁Storm +Limit +elin +wealth +uez +▁rappresent +▁resta +▁gegründet +▁journalist +isie +▁facility +illed +ulk +▁PK +Anchor +▁_) +VF +LAB +▁nå +odos +▁billion +virti +▁Jeux +юза +tomcat +▁charts +▁Bundle +▁lst +▁exer +▁females +▁obliged +▁aby +rolled +dri +▁Sche +▁vessels +IMARY +▁reasoning +▁проте +FILES +verk +osos +▁комму +дії +▁dd +▁соответ +▁IOException +ských +▁CLI +▁ње +CM +TD +▁possibilities +▁Compos +half +▁webpage +▁swing +▁zas +▁cycl +leid +istica +▁Insert +▁Sweden +▁wanting +▁ال +▁eeuw +▁Administr +▁Warren +▁bs +▁pam +anus +Dra +expl +▁Kant +▁Austin +▁csak +▁theatre +▁compatibility +матиче +setState +бю +}{| +▁Dy +▁Zwischen +Alt +CLARE +steps +▁Lage +▁Mitt +▁Dublin +▁работы +deep +▁flows +▁Palace +unix +refs +umar +aset +cov +▁ping +▁Safari +flug +creens +{# +▁реа +adors +▁amor +uce +demic +▁Netherlands +▁clusters +▁enfor +marine +▁bugs +izzata +▁scra +Les +quick +▁turno +_* +ера +Generated +>[ +▁estre +orde +▁verg +роз +▁pau +includes +assa +aders +▁Герма +▁estaven +▁earliest +▁resultado +mun +▁plots +din +sorted +▁preference +rió +туре +▁Ligue +▁завер +phr +▁pocket +▁parl +▁lak +▁powie +▁altres +$}; +plain +▁Cred +itza +perp +Green +▁devoted +production +worker +elsen +▁vern +▁március +▁Confeder +▁Liverpool +▁музи +▁emails +▁distances +▁segments +▁anth +▁wrest +▁hoog +▁cinema +rror +▁geboren +▁éc +Marker +▁Compet +▁листо +allowed +volume +Espagne +Ze +▁fixes +▁rond +▁arrangement +/~ +.]( +▁Források +▁weiteren +excel +▁змі +▁moderne +English +▁Transfermarkt +▁bearing +▁cleared +▁сам +▁divs +ći +▁этой +▁Геор +scene +▁ages +GEN +rän +▁Toul +▁Abs +ját +▁mediante +▁empres +▁Employee +▁polynomials +▁optimize +▁выступа +fare +вей +xf +quez +▁botan +▁defend +▁Quart +Mont +vb +tick +WD +mine +▁modific +notification +▁denn +▁algo +▁Spo +▁mistrzost +/: +▁apresent +▁прод +Volume +ską +protected +▁Turkish +azy +▁pouv +▁período +skog +▁entropy +zed +тори +▁lij +boards +▁стату +Bool +▁polity +@", +▁рік +née +▁Zug +▁Uniti +émet +atience +dimen +▁Steven +Ha +ACTION +▁wand +▁Navar +▁січня +Watch +▁Stuart +▁zde +▁контро +dataset +yó +▁Bush +▁себя +▁worthy +▁Ble +▁propor +▁Village +▁ry +▁voit +▁копия +▁zp +▁cura +▁Html +▁Dieser +▁Days +onnes +▁antigu +▁Staaten +▁faint +ongs +▁öst +Redirect +ель +atorial +▁bother +EditText +▁Giul +▁заво +▁pueblo +▁Mississippi +jak +▁wings +onc +ível +iencia +entlicht +▁BTW +ornal +▁Коро +▁одним +▁salv +▁finden +geo +▁авиа +attung +viv +▁Luther +▁общи +▁Rolle +▁Abraham +▁centered +▁slash +isat +emann +Os +парта +▁Pablo +▁collaboration +paths +édition +▁viewed +▁consisted +▁recovered +▁Mexican +▁Fix +▁spell +Special +▁Ст +esseur +▁Украины +former +▁św +▁zeros +▁Straßen +▁organisation +üssen +▁Sierra +▁Season +▁volont +BeanFactory +▁помощ +▁pressing +▁equivalence +▁catt +icity +▁accomplished +▁yo +▁sic +▁imports +▁accommod +▁Porto +▁яка +▁loan +тики +▁checkout +▁assess +▁Population +urent +clojure +▁Santos +▁információ +POS +▁gare +▁kick +▁radical +▁Peace +▁streaming +camp +ząt +говор +▁Regierung +▁proceeded +fm +лены +▁earnest +▁Parad +requests +▁Raum +šč +▁policies +▁Tig +▁sitt +▁Energy +▁purely +▁Haut +▁Speed +bio +▁orange +▁biggest +▁britannique +▁Notable +vu +лении +бин +▁Nash +щение +▁ciel +adémie +▁грудня +▁joue +▁voted +rico +▁гор +▁команду +itivity +▁ще +▁definite +uropa +!"); +Defaults +▁некоторы +édération +▁silly +▁talked +reu +▁Lomb +▁statue +кта +юр +umably +▁городе +▁Runtime +▁diagn +▁retro +▁Sverige +▁inicial +ienza +▁figlio +▁zog +▁rey +▁Rund +тный +▁ceased +erno +▁esa +▁trouv +▁Gemeinden +▁comercial +skap +enario +▁juris +TB +нала +▁vij +VO +▁clin +jör +сан +owała +ribución +▁ursprüng +▁condem +▁Stage +▁mixing +▁різ +▁fans +ház +social +zan +▁свой +Cookie +▁Roland +azionale +▁Sloven +▁Fiche +▁Sé +hä +▁officials +▁înt +Interceptor +Tables +▁davon +initialize +]=" +▁Body +▁Upper +▁Collect +▁Zürich +Horizontal +Typ +▁político +▁RewriteCond +▁hoped +▁anxious +Liter +jahr +▁assemble +▁crypt +lahoma +ASH +▁Бри +▁Cic +twitter +hyper +▁Tell +ільки +вобо +▁bazie +▁contemporary +▁Parameter +stwa +▁bekend +cock +previous +enska +▁caller +]]) +▁Raz +▁Selon +▁proposal +▁bý +▁Sied +▁Arbeits +▁pride +▁slope +idé +gradient +▁Джерела +▁SH +▁разрабо +iversity +сподар +\{\ +▁стали +▁Einzel +▁rgba +▁Anim +▁alles +бар +erte +▁réalisé +Institut +▁markup +▁vars +▁gam +▁Василь +izza +▁Cob +▁Metal +▁leak +▁Lanc +Switch +Delay +atuur +▁четы +▁англий +▁legacy +▁desarroll +▁topological +▁jeweils +▁Nederlandse +▁atmosphere +urban +▁slov +▁lawyer +pecially +▁alternate +▁paramet +▁establishment +▁woods +PD +▁наи +▁mang +▁wechselte +ську +.= +▁fifteen +SUM +▁Fro +▁LED +owano +ствие +▁Données +tol +żyn +cref +ствии +horn +▁сооб +▁оборо +▁Complete +“) +▁kindly +▁Chamber +ség +WH +▁ambient +кро +▁cheval +▁написа +flu +▁Offiz +mate +natural +separ +empre +ViewHolder +fw +▁letech +▁trailing +atri +▁Gó +▁Bonn +▁unlikely +RAM +enst +Stats +▁политиче +)--( +▁trom +!... +▁Meanwhile +стана +▁Reino +▁Arist +$}}% +▁solem +closure +ignation +łod +▁divor +▁международ +=" +Orientation +cid +Cart +▁murm +▁assez +▁linking +building +▁reconna +▁shook +managed +landa +▁León +▁création +дой +ocity +▁wij +▁wieś +xtart +▁Move +lungen +ствует +orney +optional +macro +Condition +▁squares +▁mistaken +ánt +▁Ris +▁sentences +erea +▁mij +Und +▁nombr +zA +▁Independent +▁preview +imas +▁males +inental +Thank +▁popol +▁pover +▁grasp +▁imped +▁campionato +▁Wei +▁titled +▁Además +▁Password +▁Pam +UILD +▁липня +werb +................ +▁Río +▁teeth +bp +▁SW +ulaire +▁seized +▁Stef +úl +▁viz +iony +▁junt +▁která +▁września +<> +▁surg +▁tutte +▁Hob +повід +▁wohl +▁trag +▁Crown +▁trova +стову +▁Vienna +esehen +▁metropol +▁reflected +тета +▁traduc +▁Bast +▁erschien +woord +()" +talet +▁roads +ведения +ührung +▁cogn +▁Valle +▁landing +▁Regex +▁Iowa +dział +▁erreichte +aum +▁founder +apolis +Compiler +▁kop +▁marc +▁територ +))` +▁lei +geon +▁weapons +▁horn +▁elif +▁Capital +će +▁forall +▁эта +preview +▁DNA +▁sid +orch +▁Ras +▁arab +Best +▁счита +▁López +ança +▁funkc +▁tienen +;& +museum +▁Err +▁resort +Nov +▁kal +MW +шь +anchor +▁роман +leading +▁manten +▁Silva +dade +▁designated +▁revista +Oct +percent +▁уні +identifier +mass +@@ +ulsion +germeister +▁predicted +▁сви +жной +▁Ergeb +▁cust +▁removes +charg +пример +▁forming +asma +stdout +Fun +yme +tered +ursive +ighed +▁след +verband +▁LOG +rams +éon +endra +▁Bereich +▁temporal +▁langue +▁Inn +▁moreover +▁tutorials +Middle +▁советский +▁maintenance +asures +▁válto +BASE +▁disappear +ския +▁conocido +▁Нау +▁Libert +▁Harold +▁lifetime +▁Tür +▁zawod +omic +▁Retrieved +architecture +čka +iformes +development +ordnung +Inf +leben +▁Stars +signal +▁grammar +▁corso +▁Wagner +▁geht +▁royale +warn +umbled +▁instit +▁Ши +hh +▁refuge +▁favorite +ierto +▁condado +▁Ther +▁человека +▁Food +▁seizo +▁Initialize +▁connu +▁overlap +▁Emil +▁Martí +▁жовтня +erva +▁boats +ações +▁derrot +▁malloc +▁conject +jk +▁sare +лемен +▁sums +Authorization +▁Kun +]$, +gemeinde +odot +defin +▁emission +▁Крас +▁appart +▁stopping +▁Сред +▁conjug +▁insight +▁Broadcast +▁PMID +▁advantages +enes +▁residence +ljen +isseur +▁pubblicato +▁GitHub +▁Peru +▁galaxies +▁annotations +gas +▁répond +Js +▁independently +NP +▁inqu +▁grounds +Components +▁anten +▁вз +▁hos +▁sint +▁hiding +▁województ +Messages +▁показа +=== +▁Abstract +▁läng +▁Formula +dawn +▁designs +Img +▁Portuguese +▁incluy +avigator +▁Brothers +▁continent +▁evidently +race +цького +▁reck +▁серпня +▁Grey +▁appeal +▁unlike +▁powershell +▁racc +fers +▁burning +fasst +installed +▁Give +▁colonial +▁€ +▁Rö +▁christ +nehm +там +▁corpo +▁convirti +yter +Sym +▁Greece +▁moth +▁Johan +▁monarch +▁Download +▁craft +už +▁Luke +▁suffix +\/ +Have +▁карь +▁comfortable +▁tips +▁Після +▁броја +▁информа +MQ +бран +▁tx +▁slaves +▁firewall +▁Forces +atif +▁Quellen +▁théâtre +льных +▁расположен +▁Details +ką +▁longitud +INST +▁naval +Fernseh +essel +Grad +▁belang +▁aggi +ZygoteInit +łów +▁Sug +sil +▁exterior +щі +ORD +enser +▁rapide +▁темпера +incie +Si +avam +arded +▁Added +Endpoint +hardt +стран +▁estilo +▁Haz +▁musste +uo +iii +▁ří +anzen +жений +aha +ARNING +▁renov +▁divine +▁convinced +▁humans +▁departure +▁Mediter +qa +▁possessed +▁церкви +giv +▁свої +▁Ortste +Rich +puis +increment +▁Hannover +▁ucz +Done +▁alguns +FIX +▁Heritage +removeClass +фер +▁abc +Dr +▁семей +{: +▁seule +zeichnungen +addy +▁París +üsseld +▁reception +folio +tiny +▁recensement +▁Nur +▁kier +▁gmina +staat +ándose +ческая +▁speaker +▁exponential +▁Dieu +▁приз +▁Rafael +▁ggplot +▁Template +oure +▁Inner +ogne +igare +▁Arte +▁Cov +▁aufgrund +▁Бы +▁ceremony +▁Spart +jective +yi +▁inizi +▁latin +▁Nevertheless +▁Done +тря +▁Arr +season +▁складу +▁podczas +▁Beautiful +▁Weltkrieg +▁зо +▁overcome +▁Praha +▁району +▁subscription +igent +▁пока +latex +▁beach +▁роках +geg +▁probl +arguments +▁organizations +▁Nan +▁stones +▁Hunter +▁regularly +шого +▁flexible +opts +ář +witz +▁') +PASS +▁kraj +▁fake +heits +osph +parseInt +FALSE +▁profess +people +▁precip +dirname +▁perpet +▁Updated +rayed +▁provoc +▁травня +▁categorie +▁тео +сну +otr +▁Верхов +▁compét +Cost +▁wider +▁Obviously +писан +▁настоя +▁seeking +()), +▁équipe +▁commits +▁Svens +ябре +atern +▁heter +▁Bootstrap +éné +▁derivatives +▁Detroit +▁provincial +onomie +EB +▁cuer +▁относи +▁ней +)». +▁Ciudad +IAL +zyst +)") +▁Alc +blogs +▁parmi +▁Albums +▁Boliv +▁clés +Products +uerdo +▁gelang +znik +hagen +anonymous +▁svg +▁Conseil +▁Ari +coli +▁czy +▁CV +▁ford +▁Außer +▁CI +▁tempt +▁Organisation +áš +▁cycles +▁geslacht +▁людей +ými +▁Spieler +efe +▁Marvel +▁portal +▁Серг +▁grado +▁handlers +▁Interface +AME +▁seriously +▁Binding +▁Rang +▁nada +oce +▁integra +ocracy +▁альбо +▁stability +Uns +▁veter +------+ +▁serait +▁omitted +▁uncertainty +onian +▁resto +▁желез +▁одной +▁Bevölkerung +▁Kraft +стр +▁Moscow +lane +arab +▁spole +▁своего +?: +START +▁интер +▁sympt +▁Lorenzo +▁ejec +▁prosper +DAT +лимпий +▁shapes +valueOf +▁associate +▁Medien +ENV +▁сре +▁државе +▁theories +heb +▁Wayne +▁StringBuilder +iwers +▁Maps +Phys +\}\ +▁Parte +▁Hudson +лон +Lng +▁ры +стей +lau +ancer +▁Coppa +▁війсь +▁ucc +▁Pattern +▁garbage +▁González +▁Encyclop +etten +External +REF +>; +lijke +▁intersect +▁Unless +▁deeper +▁жі +dent +lef +▁chanson +▁diffus +▁primi +▁Wieder +▁aws +owana +▁sociale +ikk +льной +▁divisions +лосо +▁Claud +▁Ya +▁voce +▁Branch +▁fitted +orr +ôtel +stroke +listener +iman +восто +▁Shah +Introduction +▁newline +▁tile +'])) +▁travaux +CONFIG +▁quadratic +onneur +▁Giorg +▁identific +éricaine +▁UIView +▁Liberal +▁Koch +▁Berliner +▁notifications +▁Susan +▁cadre +▁Kloster +▁examine +▁един +▁UNION +▁alten +▁finit +▁pedig +cyk +▁mouvement +IOS +▁британ +▁bout +▁автор +ництво +ето +lera +cls +▁Ley +amy +agens +ashed +▁okrę +гро +ellett +▁Fellow +▁manifold +$), +lder +▁voz +▁begg +▁baron +▁fid +▁firing +ilda +dek +AU +itare +▁Ara +▁Exit +▁cinemat +▁intros +▁contacts +пени +▁möglich +▁Singapore +ström +▁Hern +▁sixth +▁publications +vie +▁Hat +▁accepting +ác +stwo +▁quietly +Photo +▁basket +▁eigenvalues +▁médec +▁Olimp +▁церков +alin +consum +▁lassen +▁анти +▁Seq +"; +rare +▁$|\ +▁nick +dflare +Vec +bindung +▁bg +changes +Days +▁Mouse +▁waited +▁Tomatoes +▁fas +verte +▁succession +сор +▁sols +▁Render +▁leadership +▁significance +▁gauche +cano +▁Pie +ensoort +▁cambio +▁уз +▁endeav +Completed +▁Архивная +jd +órico +▁churches +▁animate +SG +compute +▁uniformly +INIT +lles +HttpRequest +Ко +Diff +▁sah +airo +maybe +UTE +▁Dow +human +▁aurait +dark +▁repair +▁ner +▁Dabei +▁Botan +Original +ază +▁NAT +imper +▁Youth +thes +▁округа +▁Flo +▁breakfast +urls +▁übernahm +ários +▁Orange +▁Affairs +ske +▁notify +imoine +▁Arena +▁liberal +▁obec +ifa +guez +iono +ператор +▁retained +failed +bine +тных +▁CGRect +camera +idenote +KB +▁lights +▁Pictures +▁Squadron +▁Volk +▁burg +,] +Gi +êque +makeText +▁everybody +▁Hyper +▁Deux +▁glory +presentation +onica +▁frère +aget +▁hints +▁tunnel +▁Ej +ális +▁Viv +ственных +▁caps +PART +oci +▁prices +currency +▁achter +romagnet +gender +▁suis +versions +▁Training +inside +ege +▁totale +▁Daar +▁grudnia +▁Ier +▁occasions +▁kde +▁tensorflow +▁ór +Methods +▁looping +▁directeur +kę +▁isomorphism +▁João +▁aligned +онов +urger +▁nova +morrow +altern +HD +▁marqu +ativas +ggreg +▁ancien +nit +▁secured +mier +▁Ole +▁инте +▁minus +▁clearer +▁nello +▁információk +▁propre +{. +ilog +▁Quick +▁accus +employee +▁зу +цький +фіцій +▁публи +▁bent +▁позво +▁Пор +ází +ánico +emptyset +▁surtout +reno +unya +▁уез +▁Millionen +▁listopada +▁Maine +▁grupos +▁Storage +▁apple +▁Lö +oused +дро +sci +▁hibernate +dog +▁восто +▁intensity +legend +▁Wille +▁szerint +gesellschaft +▁Living +allo +▁Split +dru +need +▁Джон +▁Swiss +▁spraw +▁beho +▁fotograf +▁rencontre +▁kis +▁signing +akult +▁indexing +apor +▁conception +aggreg +▁Савез +▁affair +ění +August +▁секре +▁mieszkań +UIImage +▁bishop +▁servants +▁trail +digit +▁joins +▁Near +öffentlich +>{ +▁skład +geführt +▁Holz +▁Militär +achi +Upper +pine +utzt +▁nuova +ibration +▁Bien +▁первый +▁Creating +Once +▁einmal +▁geometric +stvo +▁kW +▁decomposition +▁comedy +▁activation +▁angry +illeurs +▁instantly +▁suggesting +▁Clay +cot +▁Gén +($( +unwrap +▁lifted +▁Kit +▁linea +ок +hart +->_ +▁nuit +▁Issue +лии +▁röm +Tasks +▁Sr +▁seis +asia +}}$. +:{ +controls +▁Stim +▁Recht +ociación +▁Natal +▁Philippines +ulen +Fixed +▁switched +Zip +ospel +▁начале +▁Blan +urst +▁autour +Ca +▁latitude +▁Frei +▁Musée +▁Kurz +▁região +swap +▁hate +▁modifications +▁Ком +▁Antoine +uga +RECT +éter +GROUP +▁sacrific +▁Whe +▁Stevens +ologische +Summary +obs +hnen +<%= +dienst +remark +▁veröffentlicht +ел +▁Mock +▁Льв +▁três +gb +▁celebrated +▁Eb +▁costa +▁Geographic +▁attachment +mannschaft +▁dependence +�� +▁attitude +etal +vic +baut +▁дов +▁interven +▁Gü +ónica +▁Pon +▁disponible +▁Feb +▁worship +▁Specifically +Hy +iju +▁cb +▁spac +leveland +▁localidad +▁preceding +▁Hessen +xp +▁Wein +▁Româ +▁giorno +▁квітня +llaços +▁Academia +▁kül +▁Års +▁нај +uclide +Internet +orton +▁corn +ями +▁"* +▁Felix +apat +▁свои +MIT +made +▁locomot +хода +FP +▁pm +.*; +▁Hamm +`} +LayoutInflater +==" +▁Eur +▁dogs +жении +▁azon +▁emulator +▁ricon +beeld +▁ну +▁approximate +LM +▁Bond +▁enh +ędz +▁solit +RelativeLayout +eteor +amentos +▁indirect +iből +▁gros +▁Originals +commands +Export +▁Avec +▁solemn +▁correction +▁проводи +▁Mosk +▁подо +▁gebied +▁następ +▁Driver +▁Ook +▁Vec +▁lungo +ficos +▁svol +▁kid +nja +▁Hr +▁поддер +▁visibility +▁Méd +▁cpu +discussion +Asset +▁defense +▁Anyone +▁Justin +iszt +▁Collins +▁Valent +▁Pale +▁fuel +▁nose +ríguez +▁Schles +▁Malays +▁commut +dro +uing +▁Rico +▁Emma +orp +▁Kirk +▁Quando +▁Neue +▁demande +▁Cover +▁rescue +▁gewählt +▁Calendar +▁Madonna +WP +oshi +▁Maven +▁belle +▁wx +▁sugar +▁Betrieb +▁equilibrium +EAR +▁texts +слов +▁czerwca +▁Düsseld +▁ELSE +▁amery +▁ani +▁obey +▁Nell +▁inne +▁тро +FD +cco +▁Zob +alette +▁május +ected +▁Turkey +▁Whether +qi +▁што +▁headquarters +endi +arus +opus +▁золо +▁destru +▁Lok +▁satisfaction +() +▁Тер +Jose +▁conquer +▁Effect +LayoutParams +iez +▁externs +▁gegenüber +▁ESP +olta +processor +▁Kult +▁Atlanta +▁tier +Operator +▁диа +▁пись +▁groß +▁hearts +▁millimeter +although +alles +▁Magic +training +oline +▁органі +>\<^ +ціаль +exports +Workbook +▁вересня +▁teles +▁economy +▁trap +▁refuse +▁stranger +▁instinct +пода +olan +▁ning +inflate +itatea +acks +▁Joy +FLAG +ailand +▁sorti +▁впер +▁pén +Nothing +▁száz +▁Áng +▁AUT +Actions +Every +▁червня +▁автомо +▁routine +▁estruct +▁Gang +▁holes +thesis +▁concl +▁pé +riers +ровой +adic +Speed +▁commanded +▁Nazionale +Managed +▁DECLARE +▁sedan +Strings +▁sacred +tersuch +▁abitanti +brit +▁NCAA +▁СП +▁aged +▁Chiesa +▁revision +opro +▁overwrite +embros +▁sortie +▁otten +xiv +▁deli +▁Asp +▁balls +kaf +▁brave +▁всего +egn +jpeg +▁Osten +Constants +▁Infantry +▁Nev +▁яких +▁муниципа +cija +▁poem +▁negro +хар +▁Ask +▁avo +▁Meyer +▁Westen +▁oko +agin +▁Süden +entries +▁Republik +CollectionView +------- +▁firefox +▁alcune +▁фото +▁отрима +~~~~~~~~ +▁Раз +▁Complex +▁pia +▁publicada +wei +cedure +occupation +▁medicine +▁drove +Problem +▁beginner +▁thoroughly +uria +avant +ucha +▁lever +▁teatro +AVA +squ +trat +ivatal +▁dirty +▁seconde +▁gravit +▁proposition +hbar +omini +▁” +▁Camil +▁queen +modifier +Jan +▁lyr +ComboBox +ionic +▁holy +▁Sebastian +|_{ +▁{@ +▁можно +▁Creative +▁interess +▁CT +ições +▁chant +▁współ +▁Мексика +▁ranked +▁października +▁brut +▁farther +▁Verb +▁Seven +lbl +▁mentions +▁Fight +ifen +▁bog +▁regres +▁scoring +icane +▁Elli +▁pierw +measure +ńskiej +#{ +▁деся +▁varmaste +▁Unix +IZ +itié +Primary +▁Springer +üng +▁anv +▁versione +▁shoulders +▁брига +▁jav +ltal +▁kallaste +▁Mitchell +▁wireless +▁Ál +respons +could +▁relax +Lond +ńcz +ствовал +▁polski +enç +zar +▁dtype +owned +unknown +▁mutable +▁siempre +▁Montreal +▁locate +▁traces +▁insgesamt +▁Nil +▁прода +▁Warner +▁Nau +triangle +▁concentration +▁gentlemen +ächt +filters +incipal +VALID +▁депута +adó +▁konst +gså +agas +▁meilleur +▁данным +єдна +encoded +<' +▁sheets +cuador +▁використову +▁Deput +▁manière +ąg +csol +)$- +UIView +▁millones +▁Ehren +Sil +▁atac +▁Cold +"\ +▁approached +▁Årsmed +WM +▁Deport +mis +andbox +observ +setting +ható +▁strat +▁spre +▁personne +▁dirige +pull +dating +▁Fact +▁manipulate +▁MAC +▁dej +ultimo +FX +Life +▁crack +▁mí +▁пове +▁wore +université +▁formulas +▁Elisabeth +plots +mile +▁menor +тил +keyword +▁Baltimore +hrer +▁Clement +vim +rass +Take +▁című +▁Convention +atge +seed +▁Dí +▁Spider +ahoo +▁имеет +ührt +▁пописа +▁Cot +▁nobles +RESS +▁chemin +▁główn +GG +▁Germania +▁Alexandre +hens +swift +oop +Subview +▁requiring +ędzy +▁fict +▁Констан +▁déput +▁surprising +▁deix +▁unterschied +inson +▁Character +▁gestion +chus +comes +▁neur +▁yeux +ollar +▁parad +▁maggiore +TRAN +▁votre +▁descent +▁Icon +▁Judge +▁occupation +eping +▁tongue +▁Enllaços +ruf +▁protein +▁visitors +axy +esten +blica +hw +▁spirits +▁reduces +▁мен +▁Lamb +▁Mine +▁verified +▁Baby +▁prize +вър +▁ratings +▁fore +asha +urrence +▁intér +▁Olímp +cra +▁computational +irche +.:  +▁illustrated +▁Share +▁households +▁convolution +oemd +▁zdoby +ccc +▁quantities +Che +Should +▁genius +adj +хва +Петер +EMA +▁Rights +▁Eli +VAR +шло +▁збір +iftung +▁contributed +zef +▁CHAR +▁Sib +▁Mant +▁связи +▁javafx +▁cependant +▁intu +▁твор +▁Ó +guer +rado +▁Revol +▁fémin +▁Orleans +▁poj +▁prez +Tex +ouwd +?( +▁LIM +istique +esar +▁heures +icki +▁dbo +skih +confirm +▁világ +▁ciutat +▁DR +▁Hawai +ched +▁spher +▁Artikel +▁Multiple +ciu +▁мы +▁lipca +](/ +Strategy +▁Alabama +SDK +UTC +__. +Arguments +▁setContentView +île +ByVal +▁JVM +ющего +▁Leonard +▁justify +цем +▁nab +CCESS +▁hopes +)& +sero +▁зай +слід +▁Rég +▁Sang +▁fung +baar +▁coffee +assembly +▁Він +эй +▁comprend +filled +рд +odia +▁gens +fluss +Drawable +▁surve +Setup +▁należ +▁conjunto +▁Его +▁oldal +▁verbose +▁Electric +▁Harrison +engen +paragraph +▁nouvelles +▁време +▁memor +▁mayoría +сад +▁bataille +▁thermal +▁Хронологи +▁Better +bye +▁театра +roe +▁segle +rott +▁opinions +)}) +ühle +▁Gün +▁Щ +ból +▁Larry +▁solic +▁zwar +▁Caroline +▁Reichs +Extensions +migr +:@ +▁enumerate +▁eigenen +▁explore +ému +▁gat +▁imperial +▁Usually +▁tud +▁укра +him +▁corners +▁SER +▁interpreter +▁Ice +▁amounts +▁Pala +▁tinha +vole +▁gle +ucci +▁siehe +Jack +▁woll +▁elder +▁кораб +▁engag +▁Laurent +▁achiev +istik +arct +тного +▁gir +▁Singh +mathop +USA +▁Projekt +▁debe +richtung +▁Tsch +uminate +▁szó +lyph +зидент +▁limitations +ющей +▁bila +Push +▁offering +iennes +Fri +▁postgresql +▁Tommy +▁particolare +▁století +▁arrib +▁Eva +school +▁vendor +▁Dallas +▁prolong +CREATE +▁suivante +STATUS +là +kv +▁häufig +▁Agricult +▁huit +▁inoltre +▁Lloyd +▁француз +▁выпол +▁faithful +▁Вар +▁verl +▁juego +▁Резултати +,..., +▁implicitly +irks +Calcul +▁meses +omed +▁pak +herit +▁optical +▁Історія +veis +▁capitale +placeholder +intrag +▁Atlas +)]; +icons +▁Bent +▁Widget +▁volunt +avo +égr +lige +▁NAME +▁abstra +▁fís +▁Browser +▁bush +hall +▁clouds +▁SUB +▁tandis +▁Commonwealth +тая +▁exhaust +________________ +▁Statistics +▁Religion +▁Muham +uals +goto +Digital +Family +▁Bun +letin +Management +▁capabilities +annten +▁себе +▁stays +kter +▁dost +▁Тре +лович +▁dying +sections +ános +▁apparten +▁zoals +▁dressed +▁compress +ńska +▁sierpnia +▁титу +dictionary +▁rabb +▁vérit +Во +▁singleton +▁vital +Refresh +мель +▁Zh +▁Afghan +inkel +aaaa +▁participants +arin +▁Mold +▁primeros +▁ран +▁Амери +▁restaurant +ével +▁SL +▁Rey +chas +▁electrons +▁Pitts +▁Jules +май +enant +-} +лад +▁Москва +gom +▁Fernández +fund +interno +▁Mari +▁rius +▁Prozent +стрі +▁внут +anterie +▁прис +▁обы +▁Marina +▁occurrence +rikt +▁физи +▁schwer +▁Гре +Reset +▁mucho +andr +▁Wies +▁Keith +▁Julian +▁cole +ciendo +▁Contempor +etry +elian +гии +▁голо +▁dél +▁decent +РСР +▁szeptember +мест +castle +▁держав +}") +▁ASCII +▁Glen +itzerland +Toggle +▁tradicional +▁Plat +vee +abgerufen +(| +CLI +}}$, +▁Bowl +▁Male +▁Bres +▁пси +▁Challenge +zó +▁projekt +▁negoti +above +▁перио +▁longest +authentic +▁tradu +▁mujeres +▁Andre +▁hadn +▁Schule +odel +bled +▁Trade +▁mobil +▁algunas +▁Lak +▁Connecticut +▁alco +▁Selbst +ił +▁alb +ouverneur +▁sr +▁vba +loped +▁Partei +uate +▁Authentication +bei +}}. +▁konnten +▁допо +▁hyd +Office +données +▁Cleveland +rita +íos +▁выше +▁Roberts +▁élections +▁'') +▁publishing +▁bapt +<>(); +missing +ровано +▁housing +▁inference +▁Renaissance +▁règ +▁Steph +CES +ERE +кет +OU +▁grouping +verkehr +jih +agli +▁milk +lait +Stage +▁byly +▁wooden +keley +etra +▁Peg +▁donné +adal +sequently +▁insbesondere +ELD +▁Mam +▁volte +▁prospect +нове +▁denoted +▁overlay +Permission +een +▁EM +▁uz +Mc +olit +▁servi +▁Heidel +▁Wiener +▁illegal +▁predictions +▁goog +hon +▁Cinema +▁револю +▁Rule +wod +▁radiation +oł +ової +▁Perform +▁prisoner +▁amet +▁figura +▁Commander +▁официаль +▁trov +▁acted +▁workflow +▁Республики +▁guidance +▁мене +National +▁Kel +webpack +простра +▁llamado +alog +terra +ixen +legraph +äischen +▁teachers +uden +▁også +possible +▁Soul +▁Geography +▁зада +hit +▁anger +▁remporte +Pod +чке +▁aria +▁Astronom +chapter +▁fork +▁Cuando +mense +▁Christians +gc +▁#( +Organ +▁steady +pse +жить +ignes +aterra +movie +posta +raste +▁Ressource +▁País +▁(); +▁penalty +тт +▁trasfer +century +▁cleaner +selenium +ortheast +xic +лії +▁inglese +▁Tang +▁gods +frent +ciente +starts +▁musica +ymnasium +----+ +▁terrest +▁retrieved +iare +unning +▁Marcus +▁promote +warning +тый +})$, +Transport +▁reson +▁Clo +▁erm +▁eliminate +heimer +▁saves +▁prayer +Classes +Express +▁Akademie +Else +Turn +▁ikke +▁rei +▁dirett +▁Rost +▁Papa +▁jsf +лением +▁Tul +▁Zak +▁niemieck +Tw +amour +nested +ppets +шп +dit +зен +zyma +hrte +Constraints +▁ownership +Arm +▁consumption +▁fet +ivari +chrom +setAttribute +▁compose +▁backing +▁Paz +▁scri +▁Mechan +▁Norway +▁Jup +▁mér +▁administrator +▁cabe +ivalent +▁throne +▁dues +▁humor +▁Adri +▁abort +ñas +▁Київ +jící +▁zweite +▁doub +ershell +шой +▁Fam +åk +▁tweede +▁Rib +▁før +pción +inned +rvm +▁Appar +▁Dj +▁Shang +Distance +▁dawn +▁Matth +▁errichtet +phantom +▁releases +Recognizer +▁Kop +▁Pul +ué +nats +relax +▁fled +▁experiences +щее +меня +▁персона +▁Identity +rets +kunft +larg +ListItem +vd +runner +lant +ipart +bay +iei +▁lengths +▁cattle +jets +▁sehen +Jul +fatt +▁surrender +▁Trump +дного +▁Fourier +ieben +_" +▁früher +▁garant +uclidean +ägt +▁півден +Pages +▁rivers +▁donner +svn +▁ł +ově +▁Leist +arial +ových +▁filling +▁musicale +maxim +▁dashed +▁Нов +Drawer +▁Medicine +▁dokument +owel +vić +hely +▁elet +Seconds +▁Gonz +rou +▁finales +rn +fø +▁indexed +className +▁ober +▁duas +▁optimized +▁kdy +versary +energy +▁центра +▁currency +zyż +Like +▁Ги +sono +▁palab +▁pushing +ublik +▁Hass +}\,\ +unker +▁Factory +▁Resources +datei +▁Tools +▁stehen +sime +▁Ху +▁hoch +▁Rodríguez +zeitig +▁Terry +▁обу +Usage +urchase +lö +▁Introduction +▁participation +ος +ogli +apy +▁hopefully +ponder +▁Yang +▁promises +▁верну +▁остров +^{+ +▁mostra +▁CURLOPT +HH +▁stdout +▁brilliant +▁manuscript +▁decir +▁Bolog +▁места +▁invisible +▁Chal +▁analyze +prilis +attend +Mvc +than +cko +▁Quebec +▁planta +▁télévis +▁uninstall +ències +▁gminie +▁Pref +▁lequel +Invocation +▁Í +▁transformed +MAN +gebaut +▁сохра +▁второй +▁Lith +wendung +▁Politik +▁Senator +▁LL +ждение +ште +▁Cés +▁bande +▁historian +▁passwords +malloc +▁semif +▁rå +unicí +Available +Optional +▁Twe +▁kró +▁subsets +▁DAT +▁doubles +никами +▁зв +gegeben +▁Попис +▁július +▁meteor +Mount +ivent +▁Nathan +▁Schutz +egov +▁död +▁meat +▁пункт +▁minds +elivery +▁TLS +рем +ckså +▁stayed +▁Bin +▁Pia +▁имен +▁Bobby +▁produit +empio +▁reducing +▁Yu +▁Geschäft +▁perché +▁cors +▁icons +AppData +▁Hog +▁рів +▁Sans +▁siège +stellen +Brush +OFF +▁visitor +▁bath +▁fee +atisf +▁curv +▁folgender +▁conscience +▁Seattle +▁medieval +distribution +▁DM +▁мя +▁RUN +akov +ceil +▁letting +▁dov +▁оби +kiej +▁direkt +▁tm +colors +▁altro +▁tijdens +]{' +▁Bom +▁kunst +▁shelter +▁rav +predict +▁comenzó +▁świat +▁Durant +▁schemes +▁mesh +▁indicator +▁Emer +▁guilty +нец +▁consequences +cludes +▁Lower +▁поме +▁pace +даго +▁ambos +lb +▁educated +urale +anh +esség +▁associations +town +▁trif +samples +bos +▁Spect +▁Це +altung +▁Lob +▁curiosity +▁Weiter +estone +▁demol +▁apolog +▁Dynamic +Inner +esper +ecz +uellement +▁Hamiltonian +Atlas +▁argue +Foreign +collapse +▁términ +▁electronic +▁NR +▁corr +temps +IndexPath +яз +▁talál +today +wave +▁sib +▁спи +▁convey +▁Géographie +▁Нью +▁Hibernate +▁tin +dic +ppings +sweise +▁rolling +▁selects +)\) +▁poeta +▁степени +▁Abr +▁höch +▁stern +▁fjär +▁installer +decl +▁miser +groupby +substr +▁phenomen +▁Wing +▁fills +▁único +Running +Come +irable +simeq +▁remp +kele +liers +▁kwietnia +▁interrupted +▁Jet +=\{ +ído +▁Taiwan +▁возра +▁alternatives +▁Tir +▁Reserve +▁Кур +▁Nobel +▁работал +▁axes +▁Cependant +ká +▁erneut +▁Demo +communic +constructor +▁Monday +Nil +HashMap +payment +▁fixing +▁ADD +review +▁possibil +▁grote +▁grouped +▁Lima +▁Augen +▁också +onas +▁debate +▁Ingl +Da +SOUR +ettbe +▁Battalion +▁Float +▁cone +readsheet +court +ligen +▁Beginn +▁LIMIT +▁enjoyed +▁Jakob +▁telt +backend +▁Gemeinsame +lint +alling +▁bör +grand +▁diverses +▁związ +▁Kompon +▁innerhalb +▁desarrollo +▁Masters +ioso +]`. +▁francesa +Aff +inek +▁dessin +`.` +▁ranks +берг +▁skal +▁Sultan +АН +▁способ +▁contradict +▁recom +▁Oklahoma +▁Vladimir +▁meters +transport +▁consulté +▁ATP +ebb +▁volunte +▁outline +LIC +▁euro +CharField +medium +▁Belgique +Proc +routes +▁contribu +!} +ším +▁Less +▁Kost +▁eredetiből +reven +verify +▁Salt +▁shooting +▁dispose +ují +▁tierra +▁poison +sak +perimental +▁Né +▁Kid +agyar +▁archiválva +bereich +íz +▁Ritter +▁Хронологија +zeum +дах +▁gründ +▁programmer +▁conseil +▁encrypt +integration +Culture +▁Circle +Observable +▁genomsnitt +▁Selection +▁irregular +Autres +Percent +fault +▁virtue +ąpi +▁sess +▁Также +Timestamp +▁littérature +▁moż +▁borrow +▁conced +чник +▁Lund +IONS +ynie +▁Shin +▁osob +bě +▁intuit +▁нап +▁proph +▁pitt +▁IBM +▁Till +▁hina +ittest +generator +▁Nin +▁Kot +▁passer +▁disposition +uning +▁fame +▁tenia +ancement +▁Suisse +`- +▁hombres +▁infinity +▁оконча +▁cosm +▁Dennis +baz +haupt +▁mighty +▁prede +usable +▁wszyst +▁lb +ABASE +jna +нев +▁ases +▁finalmente +йм +pection +▁Studien +▁Norwegian +cego +INDEX +orten +▁friendship +metro +thick +▁Zel +LOW +▁thereby +unted +▁surfaces +ющим +%). +▁Wonder +▁redundant +▁Gros +▁websites +▁vio +▁ocas +vés +▁Gam +dw +Indicator +▁Kob +▁jack +Hint +▁Apol +▁другие +▁NUM +▁ofic +ystycz +▁wereld +мости +LEFT +▁Types +seen +uncia +▁narod +▁этот +Sidenote +ueil +▁отме +▁courts +fir +urz +ченко +Credentials +▁imagination +itats +buff +flash +▁badly +▁worn +▁округу +catalog +lime +▁Gill +▁Sent +iella +▁Craig +▁Sele +▁Independ +▁provincie +ossen +▁запад +▁infant +▁prevents +▁provinces +afé +beg +▁colours +BF +ën +▁Между +în +Observer +forsch +ígen +umption +▁Illustr +рист +▁полови +▁`& +▁ore +▁supplies +▁parenthes +Foundation +▁vou +▁Tout +Donald +▁RET +weig +▁producción +mix +▁utwor +▁föl +▁então +▁Sister +Tags +▁Савезне +▁privileges +▁nazw +▁Rav +▁repro +▁Mason +▁Platform +▁пробле +▁Pérez +▁blanc +Behavior +фици +eken +▁meets +(.* +▁få +epen +maker +▁loyal +members +meisterschaft +goal +шлен +▁северо +iende +дні +Proof +▁explic +▁electro +iels +reload +▁eleven +▁partidos +îne +▁Regin +▁éx +▁Bulg +▁networking +▁separator +UserName +▁edificio +▁Mie +▁idle +yed +▁passengers ++) +meno +eggi +▁nicely +endencia +чий +étés +ightarrow +▁orthogonal +▁Half +▁fewer +▁propi +▁primit +icale +▁flower +merk +▁Отече +▁persistent +▁Ville +Men +gaben +▁Isaac +ativity +▁północ +▁rok +cards +дения +▁юго +▁extraordinary +▁kyr +(", +))] +▁unix +кол +▁sink +apsed +▁kommen +▁forcing +About +▁Halle +▁Majesty +▁Switch +▁abroad +▁acceleration +urbed +▁остан +Ready +▁півні +Bra +▁цього +▁plut +▁Train +▁április +▁puesto +▁toss +▁irrelevant +▁dip +segment +opacity +▁lorsque +▁verschill +ена +▁Doc +%%%%%%%% +▁borders +gebras +▁ries +▁Olympedia +▁Generation +metros +▁horizon +▁adaptation +▁Zahl +▁nahe +▁Bug +Picture +љи +RGB +Owner +adin +▁Catalunya +ných +▁cualquier +▁Institution +insen +▁Brasile +▁fitting +Deleg +ictwo +▁Exper +ochastic +▁dus +▁пора +▁substring +ссии +oin +▁школа +▁cx +▁%) +▁Buddh +▁pending +▁Entry +▁Berl +▁cler +▁Soc +▁rounded +▁mv +ített +▁Diplom +▁französischen +▁Gan +▁Investig +▁indexPath +▁molti +persistence +▁XIXe +▁Electron +bü +gele +▁Maler +▁proyecto +▁Bath +ellers +▁GP +oning +cloudflare +▁při +▁ded +▁Odkazy +▁Msg +▁Being +▁Depuis +▁Primary +▁Appro +▁formally +ступил +▁fuera +▁Root +▁autonom +▁secretary +▁osób +▁cuales +▁Depending +▁asi +vera +▁russe +▁proves +▁presiden +RU +▁Watson +▁webpack +elligence +кам +▁Officer +▁delivery +ждён +▁импе +▁wil +▁vesc +usztus +▁Geoff +()} +▁Fore +▁wenig +▁Airl +▁Efter +▁Break +▁Städ +ismiss +íp +▁avoided +▁assertion +DN +▁teat +ína +▁mechanical +isu +@{ +▁nou +Italie +sourceforge +▁svo +▁király +▁References +six +▁Archives +▁finishing +acje +état +iffs +▁stead +▁feas +aware +lande +Inject +▁Agent +▁Normdatei +▁amen +▁Architecture +aze +ște +▁usar +▁cores +лін +▁Castro +▁væ +>", +omena +▁gesam +▁Martín +egung +▁společ +▁amplitude +▁importing +▁listview +THE +ziale +cedes +▁particulier +▁Расподела +▁край +▁divent +▁ké +quit +тором +CheckBox +▁Zobacz +phe +pta +▁sjö +▁розташ +▁tedesco +▁stal +▁Beruf +овая +▁svě +▁flush +▁відбу +▁radial +▁différentes +анта +▁Perry +Coll +liqu +▁Optional +▁Санкт +▁LINQ +▁Franc +cije +▁Guillaume +know +▁Units +olk +▁Système +▁Sales +▁ehemaligen +мирова +xhtml +setopt +▁mellan +▁zie +▁giant +Board +▁Caval +▁defence +---------- +pshire +mart +▁Dioc +iskt +▁inse +▁épisode +чик +bars +Sito +▁integrity +auff +▁vär +Azure +▁starb +▁контра +▁Мексичка +▁запа +▁Mountains +}}= +▁pulling +▁satellite +▁atoms +▁profesor +▁repeatedly +▁invasion +programming +├── +▁Lip +вшие +▁keen +▁critics +▁Nicola +▁Cand +▁distint +▁heading +pragma +{| +ymen +▁terrain +iedenis +▁besonders +▁nominated +BOOL +▁Kay +cian +stelle +▁dispute +▁щ +DataSet +nothing +Autom +hören +▁shed +▁paused +san +▁nunca +!(" +▁położ +Secret +▁Domain +▁возмож +XV +lv +ikh +▁Sony +mq +otrop +▁Logger +▁threat +asted +зько +▁freely +▁improvements +istema +▁illustrate +▁tact +▁figur +ués +riminal +odon +intendo +▁influenced +FFER +▁Ghost +▁совер +nad +ioned +▁Events +▁wrapping +---------+ +fif +▁(** +={{ +маль +▁losses +▁Galerie +tel +▁лютого +▁Kru +▁Polen +нім +near +▁shame +▁moyenne +▁CP +preis +▁passenger +lek +ionales +kafka +▁participe +▁membership +[_ +lando +stelling +Sem +gon +▁Correct +▁valle +▁readily +▁Dokument +honneur +▁testim +ulative +doFilter +▁dominant +ammer +▁која +▁Monsieur +zeg +▁війни +▁Fo +▁Amy +▁¡ +▁február +▁downloading +▁leng +\}$, +▁neat +▁Cache +ICATION +▁deve +▁sorrow +slow +▁hinaus +▁reconoc +▁Linked +▁Shaw +market +▁Dic +▁Ski +▁delimiter +▁MainActivity +▁Musical +▁Reyn +ScrollView +▁conventional +ença +▁refactor +'- +▁Hed +sprech +▁athlet +▁especies +▁Schön +▁kleinen +шко +▁Йо +▁Happy +multirow +▁augusti +▁Gand +▁appointment +▁Mediabestanden +Three +▁Kenneth +NEW +▁Notification +▁Marx +▁insc +Mor +вый +väst +vidia +▁demonstrated +fonts +▁kamen +▁Ster +▁mieszkańców +▁Koh +~$\ +»). +rene +insic +ická +xygen +▁mn +▁sched +ASC +Ig +▁Constant +▁opportun +▁MyClass +sef +oped +▁injured +VIS +▁Pero +▁Until +▁flesh +orphism +▁Portal +▁gminy +▁власти +▁Nä +ктиче +▁hrab +▁Cub +avoir +▁Lars +▁Бело +▁seizoen +▁Genomsnitt +▁Lil +▁Pool +▁Dios +TX +aes +autore +Alpha +states +Lab +nederbörd +erton +▁brid +▁richt +▁Ela +▁сла +▁weapon +▁combatt +agar +▁regnig +▁utilisé +▁servir +▁brick +▁gateway +▁torraste +▁procedures +▁årsnederbörd +▁Genomsnittlig +чёт +▁områ +▁regnigaste +▁честь +▁amid +▁grateful +▁DIS +DAY +▁ору +▁rivière +heure +▁Richmond +▁Compar +▁Нор +DOC +esia +calc +▁IU +▁vorg +▁habían +çoit +▁arist +▁кли +▁Sue +▁Touch +▁Writing +ifiable +▁wc +▁withdraw +зар +▁presently +▁FK +▁prakt +▁colored +usb +▁Perú +▁plata +▁wishes +▁кам +azar +ável +▁lamp +bishop +▁inclusion +jq +arth +▁Flag +▁нор +ædia +UNCTION +▁Bahnhof +▁approaching +▁Gött +▁cube +▁argued +▁Things +Gui +дови +▁recre +▁réseau +▁significa +Git +gebracht +▁liga +▁assured +alus +рит +▁энциклопеди +▁%). +▁Première +▁declarations +▁tricky +▁profiles +▁Fon +▁Jas +âr +babel +▁Friday +▁június +▁cols +▁EXISTS +▁Italiana +▁authorization +▁sulle +▁Emb +▁Variable +trees +▁Fly +riors +▁damals +▁findet +▁Sept +▁mundial +▁removal +▁longitude +clic +▁fade +▁gradle +▁zák +▁timing +trightarrow +atia +-. +uche +▁serialize +▁Hmm +▁Representatives +bah +rend +assador +▁shield +ucion +▁américaine +zę +villa +▁hombre +áss +▁SF +▁repeating +▁criter +▁Struct +??? +▁cheap +▁rings +abhäng +▁corte +▁administ +ixon +gypt +▁puntos +▁mezi +▁pochod +isko +nię +▁осу +▁ár +тельной +▁Metropolitan +jin +zess +▁віці +▁conflicts +ijst +▁Market +стров +▁"," +▁Scroll +gun +тара +▁amateur +▁róż +poss +▁generalized +▁Harm +cita +▁Switzerland +icola +▁muit +located +▁có +▁arose +▁communauté +})^ +visibility +ída +▁FB +▁Freund +gat +":{" +intellij +ifie +hmen +▁édition +▁које +▁інших +oming +▁arquitect +▁Presidente +▁Під +▁cabin +Theorem +▁Gay +ifice +▁hect +lą +irmingham +▁semantic +▁Louisiana +▁sacrifice +▁Christoph +▁Executive +_+ +ják +▁seria +▁Overflow +▁Lucy +▁melhor +▁voices +cza +▁капи +▁университета +INCT +▁coloc +▁prue +▁geomet +▁diretto +reso +▁Akt +▁unh +▁сери +▁Alert +Wel +audi +äler +▁guests +▁иде +Studio +▁кате +▁exponent +rze +pmod +rolle +▁Limited +Allemagne +▁pity +▁lä +▁runner +kende +EQ +▁MM +szág +поді +▁regret +▁publié +▁departamento +▁accused +hp +▁Pfl +▁Sint +▁ekonom +ractor +▁Пів +▁awful +ować +]-> +▁Fine +Са +tis +éta +▁Роди +▁Düsseldorf +LOB +osas +werke +▁lance +▁листопада +▁incomplete +▁Picture +('\ +esters +▁belonged +▁Sank +ammed +▁repositories +▁addr +Collect +Hot +▁tyl +▁instanceof +▁bonus +ový +▁моря +▁interactive +▁Mys +▁Edmund +fileName +emor +▁Три +▁Rosen +▁Prima +▁voting +▁XP +▁Zero +▁Led +amsung +▁enables +▁redirects +AST +Paint +acker +lecht +▁chairman +▁Aven +▁Sach +("< +кер +▁mistakes +▁Weit +▁prowad +▁didnt +énario +unless +▁backwards +boa +duino +``` +stor +Completion +puesta +▁dinast +últ +▁SY +ifolia +œuvres +▁racing +▁cabinet +▁cutting +▁thumb +▁Кара +highlight +куп +▁sd +▁національ +▁campagne +▁registers +▁educational +▁pesar +üge +▁oro +burgo +▁Athletics +▁MTV +getMessage +▁Hyp +▁victim +))\ +▁drums +hostname +tał +making +▁powiat +őd +threads +▁absolv +▁люди +▁stepped +exist +▁NK +▁ves +istiche +%' +ativos +▁такой +▁MongoDB +▁Ung +▁Рус +▁elim +▁Fif +icación +▁Tennis +▁Jefferson +ján +fog +anha +zor +▁університе +ahu +iada +Sdk +Setting +▁Kill +▁Wend +▁bald +▁Kub +▁visto +▁jeunes +collections +ací +вропей +▁arise +оні +MAIN +доступ +▁berg +▁criticism +▁Torre +▁descript +ières +▁estudio +▁ili +▁militare +▁Clara +▁Ellen +limited +лм +▁Españ +▁infinitely +America +ouc +glass +▁rud +▁zat +▁rin +▁Bibliografía +▁merchant +tensorflow +▁dér +▁ActiveRecord +IES +▁linker +▁estudios +cdnjs +▁Государ +ánchez +appe +club +▁další +▁Algorithm +dfs +▁Bac +▁кафе +▁&=\ +▁ат +▁Глав +▁Mou +Machine +(...) +▁compart +▁augusztus +avan +▁rolled +▁еди +Scan +▁регі +▁świata +▁mines +},{ +▁Tier +Cannot +мін +▁NEW +▁Вол +▁Manh +▁Gregory +▁principe +ISO +prog +▁Fail +▁aa +▁fecha +▁WCF +▁magistr +▁Zach +▁unicode +▁converter +▁dispers +ksam +▁Uncle +PropertyChanged +▁lider +▁opts +▁там +locked +zak +▁counted +▁persone +▁hurried +ätter +▁outras +▁genu +BD +veg +due +▁Pract +▁posible +▁contribute +UMN +▁Bürger +▁wars +▁exhibition +hill +▁astr +▁музе +▁CASE +manifest +yellow +Fn +▁RC +▁sott +▁sujet +▁Socket +▁Chine +▁frameworks +Hold +êts +▁філь +Loaded +ophe +texte +▁expres +▁consume +▁Richtung +ografi +▁magnific +àt +▁indul +ryty +▁offici +▁assault +rund +▁variants +▁сельсов +▁excitement +Times +kotlin +▁gering +▁Engel +▁Timer +²). +▁Ng +ässt +schau +SError +▁Edwards +▁Terminal +lict +Under +▁spawn +ürgen +▁Außerdem +▁kitchen +fahrt +▁Colors +▁система +▁terminated +▁LaTeX +igkeiten +▁mesure +▁Amts +▁empir +▁striking +▁exclusive +тех +▁rez +▁quan +▁Glasgow +▁lecture +▁Testament +▁funds +▁stessa +▁tribes +▁parfois +▁treball +nitz +bove +▁заслу +▁absent +▁Lauf +Smith +▁Николай +▁européenne +lr +▁programma +▁midst +▁daughters +Syn +oben +ână +idan +▁ther +odore +sdl +▁Quint +▁casos +▁Zam +▁страны +▁sprite +кал +▁nasc +▁сотруд +▁trava +▁хозяй +▁Uruguay +▁sparse +▁поле +▁mystery +▁Mang +registr +▁CGFloat +▁submission +вана +▁": +▁Traceback +▁Pit +▁Ehr +▁сра +▁Graphics +Updated +▁svensk +▁spacing +tritt +▁Guinea +▁França +Associ +▁Tová +stab +▁Learning +▁Bright +śc +▁idő +}}_{\ +▁droite +▁raising +getting +ythm +onyme +żs +▁blah +TagName +Vertical +▁aper +postgresql +▁Handle +zew +▁skulle +▁opere +layers +▁possono +▁relate +ąc +▁Mih +âge +▁Świ +isses +▁servlet +Los +▁Advanced +atica +▁ced +▁elementos +рона +iks +arf +ariat +Mobile +agua +▁timp +▁Comité +▁combining +wohl +▁Study +coordinate +▁recommendation +▁transformations +until +bounded +▁изу +hanced +▁вопро +▁Prés +▁coord +xty +▁$, +▁champions +Den +Mil +(', +▁Preis +▁eigh +▁markers +▁gewesen +ätten +▁pione +mv +▁ју +zeichnis +hoff +News +▁Stanisław +▁Brandenburg +▁Feuer +=& +жет +▁Neil +▁wirk +▁società +▁spare +▁civile +sprach +▁disse +▁gates +▁anom +▁Федерации +▁tib +▁fútbol +▁Wikiped +iate +Front +▁craw +▁Rak +▁зву +street +▁Agency +вало +▁Рас +▁mkdir +ację +▁shares +Story +▁remarks +▁keywords +Bob +▁toe +▁Vitt +▁rhs +ROP +oris +/@ +сии +▁traverse +▁referencing +präsident +rong +'): +aties +AW +Outlet +▁évol +ikes +▁environmental +icum +▁Lied +▁warn +▁Butler +▁%), +▁Zeitschrift +▁Montr +важа +▁Mercur +jekte +meter +ducation +▁attributed +*$ +▁unf +▁Vertrag +zien +▁Роб +lices +pply +ansen +▁zeit +▁immense +▁lutego +▁Bulgar +▁miembros +▁Националь +▁Allow +▁anglès +дви +▁Toy +туа +▁yard +(% +isser +▁golf +▁Ukrain +▁hosp +Include +▁Lisa +▁csal +▁Mira +recogn +▁Ке +▁hitting +кономі +▁Tournament +LOAD +▁Guardian +▁daher +▁timezone +▁tomcat +▁successor +▁Void +▁começ +▁converts +ächs +osex +xelles +aser +▁És +▁mou +▁ung +▁origen +▁Crow +▁Erd +▁sieben +lua +▁BB +RENT +▁piłkar +▁marque +▁Labour +viders +▁exempl +Sound +▁Wass +arrison +▁течение +▁Oficina +▁Daw +▁Kauf +ént +éső +▁=" +▁kat +diction +▁Voll +▁highway +James +zeuge +▁modelo +Throw +▁Forum +("@ +▁enfer +▁специаль +Numbers +▁Binary +▁Martínez +▁Stato +▁festiv +▁katol +▁Аб +▁limitation +▁STR +▁Официаль +ipes +▁Isn +▁ruled +▁cí +geber +▁lavoro +▁parentheses +оз +▁équipes +▁efficiently +▁Period +▁Regarding +leaf +▁similarity +▁gesture +datab +▁terminate +▁semantics +▁Alo +▁cig +▁OpenGL +▁heutigen +xaml +▁frequencies +)}. +▁threatened +тик +▁calcio +▁Riemann +slug +▁Finale +LR +▁Derby +▁още +▁deviation +ächen +▁Cris +ново +▁столі +▁relev +▁splendid +▁учё +erving +gable +▁générale +pom +▁Cheers +▁imprison +▁indent +▁analyz +▁revert +érer +▁phases +FirstName +▁mig +▁disturb +▁mixture +▁){ +inture +▁Tried +▁sooner +▁pels +▁établ +etro +itie +▁quartier +▁гово +▁város +ufe +heten +хом +▁soap +utors +▁duch +syntax +▁tribe +▁chante +Tri +▁Mate +quality +uola +=". +chk +▁всі +▁przeci +▁Meteor +▁scattered +Plus +trad +▁stackoverflow +▁retra +▁éditions +▁sain +cribe +ignon +ucker +▁мало +▁tenir +▁exports +▁auxili +▁]] +▁CBS +uniform +▁periodic +agrant +▁emple +Wil +▁fres +▁strutt +▁світ +▁betre +▁объек +тися +▁bisher +baum +ishi +▁Gazette +backgroundColor +jl +▁fiel +▁према +▁protagonista +▁Muhammad +▁simulate +▁Hook +fest +▁своих +Sender +▁listened +жі +jest +kord +Choice +▁hoofd +reducible +hpp +▁Wu +ši +▁Marse +▁soir +westen +emos +▁Duc +▁amerik +|}{ +▁Gul +▁Sprache +▁mismatch +Scal +Pixel +EF +▁Sep +▁powiecie +urk +▁Napoli +▁neighbourhood +стоян +▁searches +yrus +пет +Help +pont +▁Orient +▁Alfonso +▁monitoring +iao +édé +▁César +шее +Shift +suit +coded +ното +▁Parti +▁lasci +▁awesome +usta +▁Сове +▁Fland +oom +▁devi +engelsk +endum +▁Pascal +▁Bind +▁siguientes +JB +▁Petersburg +▁incorrectly +▁Bash +▁pelos +▁zespo +NSURL +▁přek +▁Crime +nach +▁thrust +▁Cultura +WF +▁Solo +▁invas +▁individually +ibm +▁etapa +▁handed +▁wherever +▁interpolation +▁musée +▁CNN +idia +ństw +▁przew +ughing +▁actors +▁Oriental +▁convenience +▁miasta +brains +▁меся +▁infatti +▁AllMovie +▁critique +▁successo +ancouver +▁fá +ългар +▁wisdom +▁Phoenix +hole +▁información +▁Airlines +.« +mort +userId +▁*/ +▁Congo +▁"` +corr +▁problemas +▁bib +▁później +▁fileName +zott +macht +▁Ulrich +Cy +endpoint +▁sheep +▁ibn +Feed +▁sympathy +▁Ib +▁territorial +rating +дами +▁dst +ую +aho +▁sug +emia +▁ted +▁Api +▁Rica +▁MR +ńskim +▁Voor +▁devil +▁Фо +▁När +▁...) +▁vois +▁abbre +▁Männer +ximo +▁intellectual +▁tales +similar +neum +▁Orig +▁postal +▁hvor +▁identification +▁Од +uesto +▁../ +▁bir +▁Лон +▁esempio +▁Eing +Expand +▁PRIMARY +▁Jin +▁však +ourses +▁Betty +▁WM +▁flask +hlen +▁Adel +laravel +▁дет +ською +▁Mundo +iczn +ifié +▁Мор +▁древ +DateFormat +ським +▁dated +коли +▁результате +\). +▁delayed +sound +▁Мак +▁"... +▁binnen +▁факуль +▁polygon +▁eggs +AtIndexPath +менталь +▁incred +chunk +webdriver +▁свобо +▁między +Received +▁Monde +▁JQuery +Butt +▁PDO +▁forec +▁discipline +chev +нат +▁redis +▁hunting +▁alk +▁proofs +PRI +▁chip +ésie +▁HO +▁rug +zos +▁sorte +▁zeigt +▁Physics +legte +▁proportional +▁toolbar +vement +notin +▁první +blah +▁présence +▁lloc +▁líder +▁Accept +▁Always +▁"{ +▁diversi +ikor +Period +жён +▁Alliance +▁relay +Bro +jön +▁Baud +▁Bian +')[ +чив +▁Poss +▁Mitglieder +▁nev +Daniel +▁tends +▁compagnie +▁livres +lub +▁ +e +t +a +i +n +o +r +s +l +d +h +c +u +m +p +g +f +. +b +y +, +w +v +k +1 +) +( +- +0 +: +I +S +о +\ +2 +C +" +A +а +T +{ +} +/ +' +x +и +_ +е +z +н += +E +M +P +j +р +D +9 +* +L +т +B +R +с +; +# +$ +q +N +3 +в +F +л +5 +4 +8 +é +O +H +к +` +6 +G +7 +W +д +> +м +у +[ +] +V +п +U +< +J +K +г +я +і +з +? ++ +б +á +й +ь +Y +ó +ч +ы +í +Q +^ +ä +& +х +| +X +! +@ +ü +– +% +ц +ö +ж +Z +è +à +ш +— + +ю +ł +» +С +« +’ +ф +В +П +К +“ +ј +М +А +ç +å +щ +~ +ę +” +ą +č +Р +ї +Н +ú +Б +Д +ã +ß +ă +ě +ê +О +š +Г +Т +ż +ё +ž +ś +ñ +ř +ő +„ +Л +э +ý +У +И +ъ +є +â +î +ò +З +Ф +É +ć +· +ș +ń +ț +Х +ô +Е +ù +ů +° +Ш +љ +Ч +ø +æ +њ +  +  +Э +ë +õ +ï +‘ +† +² +ű +І +─ +Ц +ћ +Ö +û +Я +ì +… +ō +Ж +Ю +Á +́ +Ü +º +œ +ā +Č +ź +α +│ +ا +À +═ +Š +ђ +№ +  +• +− +→ +× +ο +₂ +Ä +Î +Ś +đ +Å +ı +‎ +ū +ν +Й +ª +ι +τ +ل +′ +� +È +λ + +Ž +ς +ň +ρ +₁ +Є +ī +ε +§ +Ł +Ј +£ +ر +Ż +¿ +م +″ +Ú +ن +ي +σ +´ +​ +μ +³ +ş +π +و +د +κ +₃ +Í +ˈ +ب +Ó +à +¡ +€ +ť +η +ə +ー +Щ +β +├ +ð +ґ +­ +υ +¹ +₄ +ت +י +γ +س +の +ğ +δ +ی +ン +ه +ו +ω +ί +█ +θ +的 +© + +↑ +, +ː +ά +― +ع +Ç +₀ +± +Ø +ď +Ř +Œ +½ +└ +ό +‚ +ē +₅ +Æ +Ș +ɛ +ה +ר +φ +₆ +ė +ح +ف +ة +İ +  +← +║ +ɔ +≤ +ל +Đ +ա +Ō +א +് +ス +ش +大 +ル +џ +イ +⟩ +  +µ +∈ +ق +⟨ +。 +Ґ +ा +ج +ʿ +ა +έ +χ +中 +ב +ი +₈ +ト +ή +ラ +Џ +ك +₇ +מ +ת +一 +Π +า +・ +Σ +Α +Δ +ש +ز +् +ร +い +ʻ +Њ +₉ +ʼ +リ +‐ +ク +∞ +⁄ +ύ +Ş +ア +Ε +ɪ +人 +Κ +∀ +र +ッ +► +子 +¬ +خ +◄ +َ +ע +日 +し +ḥ +נ +山 +、 +Ї +る +文 +Ñ +ド +ד +ն +Ђ +Γ +þ +’ +® +ک +“ +⚭ +本 +ℕ +น +ѝ +̶ +อ +ў +に +数 +ე +国 +Ω +  +ǎ +ص +” +Μ +  +と +⁠ +た +ط +ր +タ +ÿ +な +أ +シ +新 +﹕ +ʃ +ľ +ロ +⁴ +் +⇒ +ţ +: +Ț +ക +≥ +ി +マ +ん +ṣ +ジ +是 +이 +⋅ +田 +を +道 +ง +¨ +ـ +เ +村 +Ê +ם +› +用 +ώ +天 +) +་ +镇 +か +不 +Τ +学 +ư +有 +ո +( +レ +گ +‏ +フ +न +ก +ɑ +す +ח +上 +‌ +∧ +ṭ +ק +ξ +¤ +ि +会 +ന +カ +ų +ま +ു +͡ +क +া +小 +ן +行 +は +ʁ +Ő +Þ +り +キ +Λ +რ +三 +が +コ +ζ +市 +王 +ℝ +Ź +う +て +区 +ാ +‚ +年 +פ +ի +ſ +‹ +त +ŏ +‑ +̃ +Ć +ى +「 +」 +ს +Ā +म +生 +≠ +Љ +स +↔ +Ο +ว +ლ +成 +定 +ล +¶ +כ +で +ּ +ม +个 +和 +ס +在 +Β +ิ +Ι +⁵ +ั +ɡ +━ +ら +オ +¼ +ե +バ +ָ +ŋ +ŭ +グ +⁶ +Ь +⁰ +方 +บ +— +高 +ệ +Ν +ѣ +ィ +地 +月 +Ô +™ +ウ +き +公 +ạ +ო +ɾ +่ +出 +法 +Θ +ส +名 +ย +ത +Φ +↓ +れ +ג +Ё +ơ +下 +ә +ψ +┼ +ャ +√ +¥ +社 +ṇ +さ +ِ +く +े +Ы +ἐ +テ +为 +乡 +川 +ナ +之 +字 +ム +ी +海 +ブ +≈ +! +پ +¯ +ἀ +ƒ +こ +ְ +東 +明 +ὶ +时 +ท +ɨ +デ +️ +ʊ +エ +南 +西 +ल +メ +プ +平 +式 +ῖ +қ +व +غ +Ò +家 +ʒ +サ +≡ +ダ +ต +∃ +₹ +प +第 +ര +ض +▄ +城 +ミ +ɐ +¦ +美 +件 +ნ +Ð +ַ +ニ +部 +ņ +ǐ +ט +य +あ +¾ +ả +ち +ュ +÷ +女 +神 +♦ +¢ +以 +้ +র +太 +্ +チ +յ +前 +金 +ւ +野 +北 +ห +‰ +っ +加 +原 +ʲ +置 +安 +ガ +我 +Ḥ +യ +京 +▀ +მ +ვ +ʾ +∨ +ִ +可 +取 +县 +二 +▒ +理 +自 +信 +代 +ี +צ +် +द +⁸ +̯ +お +要 +ῦ +க +ễ +ु +ƒ +ʰ +化 +✓ +പ +의 +다 +木 +ُ +̀ +ˌ +ह +パ +水 +ế +ด +ズ +⁹ +島 +‍ +も +正 +■ +آ +พ +内 +Ì +ǔ +┬ +作 +合 +ὸ +み +▼ +ῶ +⊙ +~ +ị +ْ +回 +了 +所 +事 +表 +ำ +分 +⁷ +ү +€ +入 +全 +إ +里 +Χ +ं +ハ +ค +⁻ +モ +郎 +据 +● +州 +∩ +者 +通 +都 +ℤ +♭ +╌ +つ +ḍ +江 +ז +Ý +ө +์ +到 +ி +ʂ +对 +스 +使 +ি +よ +Ἀ +Ï +∘ +사 +ন +世 +ɕ +կ +უ +ട +ბ +ो +വ +果 +十 +ุ +藤 +来 +面 +け +ĕ +ビ +这 +지 +ം +街 +石 +能 +空 +տ +ئ +武 +ʹ +ϕ +后 +ะ +元 +ʔ +리 +기 +河 +町 +花 +ὐ +类 +░ +物 +Η +¸ +ு +თ +ث +െ +╠ +⊆ +》 +ツ +版 +动 +如 +真 +ɲ +号 +ذ +정 +林 +書 +民 +口 +ّ +示 +മ +아 +图 +∪ +戦 +李 +ല +《 +光 +白 +心 +த +ज +设 +ί +路 +ग +∥ +한 +最 +Ћ +手 +ս +? +型 +ầ +セ +建 +ェ +主 +시 +대 +ῆ +‡ +集 +დ +目 +Ρ +ァ +度 +長 +星 +ノ +ộ +가 +五 +چ +로 +ョ +重 +于 +发 +史 +ظ +ช +え +國 +ĭ +ப +인 +你 +駅 +‒ +♥ +多 +ħ +Қ +ồ +士 +四 +┴ +ம +司 +ে +ὰ +∂ +╬ +次 +Ľ +⟶ +立 +点 +音 +⠀ +器 +하 +井 +存 +ֹ +当 +Ë +★ +寺 +性 +也 +め +だ +位 +ങ +ہ +值 +古 +გ +ব +院 +േ +▶ +ர +界 +語 +സ +수 +ǒ +愛 +✔ +時 +ọ +റ +մ +ケ +东 +同 +주 +保 +Õ +ố +ἰ +青 +ゴ +体 +清 +相 +จ +ء +情 +𝕜 +ক +ḫ +ờ +将 +族 +동 +Υ +┌ +ボ +宮 +』 +ম +『 +ļ +श +ป +Ա +ब +자 +政 +ா +间 +fi +松 +ṃ +始 +息 +少 +教 +获 +列 +开 +ტ +ワ +კ +科 +春 +治 +吉 +ས +ศ +ɒ +台 +ネ +း +ĩ +工 +ά +知 +八 +場 +画 +百 +☆ +記 +得 +ソ +氏 +ာ +에 +ল +ṛ +关 +ġ +έ +∑ +ベ +标 +니 +ὴ +ֵ +外 +♠ +わ +間 +ภ +校 +制 +แ +力 +門 +好 +ғ +Ù +ℓ +ֶ +는 +┐ +∗ +指 +色 +返 +馬 +请 +≫ +風 +ό +接 +서 +↳ +せ +志 +̲ +魔 +ң +更 +程 +김 +郡 +ོ +ũ +ച +利 +県 +周 +そ +や +谷 +香 +♯ +じ +، +期 +∅ +┘ +初 +福 +片 +ザ +動 +参 +성 +Ə +╦ +어 +ხ +義 +च +象 +功 +♂ +도 +고 +过 +վ +皇 +特 +ậ +长 +英 +ấ +ണ +Ъ +স +其 +ত +流 +除 +일 +ু +្ +永 +直 +상 +千 +ắ +館 +Ť +朝 +ட +ɣ +单 +ʀ +格 +德 +전 +☺ +ピ +歌 +进 +限 +夫 +트 +⊢ +園 +量 +土 +放 +码 +等 +系 +∼ +華 +↵ +소 +常 +否 +見 +源 +ׁ +实 +博 +라 +원 +보 +⊕ +解 +〜 +男 +দ +ポ +ろ +나 +ག +無 +Û +̥ +ұ +查 +̣ +╗ +╩ +条 +য +ὁ +後 +他 +网 +ல +≃ +화 +ە +阿 +ေ +户 +∫ +구 +ར +မ +▸ +լ +○ +命 +就 +龍 +君 +夏 + +言 +先 +➜ +შ +ძ +ਾ +வ +ど +ヒ +ไ +ன +ば +ギ +գ +ἄ +ヤ +典 +府 +̄ +신 +组 +改 +ὲ +华 +与 +调 +╝ +ヴ +ქ +由 +修 +學 +♣ +消 +符 +ʌ +부 +ớ +‾ +▲ +录 +ള +연 +을 +ひ +영 +┤ +已 +陽 +င +국 +容 +未 +宗 +ᴇ +び +장 +龙 +් +提 +ĝ +六 +形 +제 +Հ +伊 +ϵ +ข +Ű +ゃ +火 +Ṣ +佐 +⊥ +̪ +ứ +□ +结 +九 +雄 +թ +ា +而 +བ +우 +张 +ट +ष +向 +ῥ +选 +공 +ゲ +ʐ +仁 +堂 +ך +ု +ἔ +അ +ề +ད +선 +오 +久 +œ +义 +अ +╔ +无 + +은 +ʷ +那 +線 +务 +基 +属 +配 +미 +軍 +โ +津 +完 +研 +注 +失 +应 +က +╚ +友 +章 +Ψ +求 +ण +경 +‬ +भ +们 +模 +需 +ச +電 +প +դ +へ +此 +夜 +或 +橋 +根 +Ī +玉 +ู +ṅ +交 +品 +良 +ང +ォ +则 +開 +Ζ +문 +被 +조 +株 +记 +會 +经 +ू +ょ +转 +崎 +마 +⌘ +比 +造 +ܐ +ื +没 +现 +七 +Ά +商 +ை +机 +阳 +ĉ +角 +站 +բ +해 +及 +ध +術 +认 +‘ +创 +編 +ղ +ḩ +伝 +岡 +ड +ホ +港 +任 +登 +ི +็ +布 +究 +帝 +여 +산 +န +◦ +密 +变 +序 +♀ +∣ +计 +曲 +Ă +ύ +ʋ +传 +】 +包 +意 +去 +沙 +⸮ +【 +写 +超 +ய +今 +┈ +森 +ි +⊗ +비 +հ +Ḩ +ǫ +黄 +∙ +드 +🌍 +景 +湖 +ք +ိ +ⁿ +̂ +ペ +何 +宇 +張 +语 +老 +例 +Ṭ +鉄 +克 +☉ +™ +ɹ +ἱ +ⴰ +然 +를 +ǧ +報 +服 +Ď +想 +‖ +ユ +実 +载 +요 +ℚ +波 +马 +状 +线 +유 +洋 +万 +진 +জ +添 +球 +機 +支 +显 +拉 +ὑ +送 +隊 +ธ +处 +師 +⊂ +像 +় +黒 +ց + +ủ +只 +起 +段 +တ +區 +選 +천 +業 +算 +广 +រ +视 +秋 +因 +년 +ے +输 +̱ +Մ +∆ +康 +세 +思 +死 +聖 +민 +- +头 +ർ +∉ +車 +┃ +▇ +按 +⍵ +夢 +汉 +从 +ী +题 +ˆ +ἡ +展 +省 +ུ +葉 +호 +ਰ +素 +関 +그 +; +න +页 +共 +宿 +态 +ན +技 +乐 +控 +移 +影 +ụ +ゆ +ご +್ +管 +ൾ +╣ +戸 +⇔ +函 +ẓ +尾 +场 +介 + +育 +ර +泉 +ൽ +说 +换 +必 +紀 +མ +ེ +ợ +ൻ +宝 +気 +门 +令 +左 +漢 +若 +屋 +局 +打 +発 +问 +恋 +兵 +別 +ા +Ս +߬ +গ +并 +ख +ή +节 +ʑ +ץ +Ḫ +ℂ +引 +统 +智 +̩ +ै +电 +현 +✅ +赤 +断 +ね +称 +শ +身 +首 +付 +⅓ +ਸ +連 +ზ +官 +持 +奈 +御 +親 +군 +库 +秀 +址 +守 +活 +ལ +ふ +藏 +ស +竹 +草 +結 +ා +昌 +樹 +ள +무 +হ +ゼ +̈ +շ +勝 +足 +ရ +위 +į +Ἰ +航 +陳 +业 +富 +雪 +आ +再 +안 +默 +박 +용 +✿ +楽 +沢 +羅 +Ė +ʎ +忠 +错 +단 +면 +ķ +桥 +雲 +该 +ṯ +岩 +남 +ỹ +专 +切 +店 +朱 +ף +ず +幸 +母 +ɫ +々 +∷ +串 +击 +Ἐ +設 +⊤ +ₗ +經 +강 +ပ +। +ѐ +ᾶ +➖ +座 +씨 +ぶ +Ţ +云 +告 +変 +试 +隆 +개 +պ +判 +劉 +˜ +ˠ +编 +ณ +ữ +达 +Ě +ܝ +ြ +ḷ +右 +들 +ŝ +ӏ +్ +എ +ற +复 +看 +話 +坂 +尔 +衛 +զ +차 +丸 +样 +鬼 +़ +학 +喜 +斯 +銀 +만 +Ξ +ც +群 +近 +塔 +ϊ +ந +む +确 +索 +∇ +非 +望 +❯ +希 +ỳ +甲 +越 +鳥 +麻 +雅 +拳 +ក +溪 +测 +话 +池 +菜 +食 +터 +ਿ +渡 +速 +ھ +ರ +陈 +健 +ো +ක +ὺ +军 +庄 +红 +Ħ +論 +Ÿ +Έ +ự +孝 +頭 +飛 +˚ +▓ +ً +‭ +么 +達 +ѫ +巴 +洞 +貴 +项 +ദ +ɵ +̍ +ҡ +种 +运 +식 +ྱ +ḳ +彦 +⥤ +书 +构 +米 +连 +操 +装 +과 +ぐ +反 +̌ +仮 +员 +昭 +ശ +兴 +客 +删 +ම +ව +პ +ċ +ഷ +သ +ᵉ +居 +타 +𝓝 +थ +現 +ˇ +종 +助 +唐 +瀬 +ន +微 +1 +Ġ +ほ +舞 +내 +중 +Ē +导 +效 +방 +ḏ +深 +梅 +料 +월 +每 +洲 +회 +茶 +败 +ഞ +ể +ヨ +些 +双 +嘉 +모 +바 +ษ +進 +음 +ญ +丁 +故 +計 +遠 +교 +재 +候 +房 +명 +两 +ფ +才 +합 +止 +番 +ɯ +奇 +怪 +联 +역 +泰 +백 +ὀ +げ +べ +边 +还 +黃 +왕 +收 +弘 +给 diff --git a/examples/SplitLlama/.gitignore b/examples/SplitLlama/.gitignore new file mode 100644 index 00000000..1454b520 --- /dev/null +++ b/examples/SplitLlama/.gitignore @@ -0,0 +1,6 @@ +# model params file +*.data +vocab.txt + +# model mlir file +*.mlir \ No newline at end of file diff --git a/examples/SplitLlama/BaseDisModel.h b/examples/SplitLlama/BaseDisModel.h new file mode 100644 index 00000000..59304b66 --- /dev/null +++ b/examples/SplitLlama/BaseDisModel.h @@ -0,0 +1,70 @@ +#ifndef BASEDISMODEL_H // 作用:防止BaseDisModel.h被重复引用 +#define BASEDISMODEL_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +class BaseDisModel { +public: + /// Print [Log] label in bold blue format. + static void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + + /// Load parameters into data container. + static void loadParameters(const std::string ¶mFilePath, + MemRef ¶ms) { + const auto loadStart = std::chrono::high_resolution_clock::now(); + std::ifstream paramFile(paramFilePath, std::ios::in | std::ios::binary); + if (!paramFile.is_open()) { + std::cout << paramFilePath << std::endl; + throw std::runtime_error("[Error] Failed to open params file!"); + } + printLogLabel(); + std::cout << "Loading params..." << std::endl; + printLogLabel(); + std::cout << "Params file: " << std::filesystem::canonical(paramFilePath) + << std::endl; + paramFile.read(reinterpret_cast(params.getData()), + sizeof(float) * (params.getSize())); + if (paramFile.fail()) { + throw std::runtime_error("Error occurred while reading params file!"); + } + paramFile.close(); + const auto loadEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration loadTime = + loadEnd - loadStart; + printLogLabel(); + std::cout << "Params load time: " << (double)(loadTime.count()) / 1000 + << "s\n" + << std::endl; + } + + static void getParameters(const size_t *paramSize_group, size_t group_len, int size, + const std::string &splitNum, + std::vector> ¶msContainers) { + + std::string llamaBuildDir = LLAMA_EXAMPLE_BUILD_PATH; + + for (size_t i = 0; i < group_len; i++) { + if (paramSize_group[i] == size) { + std::string paramsDir = llamaBuildDir + "/subgraph" + + std::to_string(i) + "_arg" + splitNum + ".data"; + MemRef paramsContainer({paramSize_group[i]}); + + BaseDisModel::loadParameters(paramsDir, paramsContainer); + paramsContainers.push_back(std::move(paramsContainer)); + } + } + } +}; + +#endif // BASEDISMODEL_H diff --git a/examples/SplitLlama/CMakeLists.txt b/examples/SplitLlama/CMakeLists.txt new file mode 100644 index 00000000..9ec9ef61 --- /dev/null +++ b/examples/SplitLlama/CMakeLists.txt @@ -0,0 +1,850 @@ +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/forward0.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph0.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward1.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph1.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward2.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward3.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward5.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.mlir + ${CMAKE_CURRENT_BINARY_DIR}/forward193.mlir + ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.mlir + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/llama-import.py + --output-dir ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating forward.mlir, subgraph.mlir and arg0.data..." +) + +add_custom_command( + OUTPUT forward0.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward0.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward0.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward0.mlir + COMMENT "Building forward0.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph0.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph0.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph0.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph0.mlir + COMMENT "Building subgraph0.o " + VERBATIM) + +add_library(SPLITLLAMA0 STATIC forward0.o subgraph0.o) + +add_custom_command( + OUTPUT forward1.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward1.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward1.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward1.mlir + COMMENT "Building forward1.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph1.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph1.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph1.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph1.mlir + COMMENT "Building subgraph1.o " + VERBATIM) + +add_library(SPLITLLAMA1 STATIC forward1.o subgraph1.o) + +add_custom_command( + OUTPUT forward2.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward2.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward2.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward2.mlir + COMMENT "Building forward2.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph2.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph2.mlir + COMMENT "Building subgraph2.o " + VERBATIM) + +add_library(SPLITLLAMA2 STATIC forward2.o subgraph2.o) + +add_custom_command( + OUTPUT forward3.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward3.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward3.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward3.mlir + COMMENT "Building forward3.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph3.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph3.mlir + COMMENT "Building subgraph3.o " + VERBATIM) + +add_library(SPLITLLAMA3 STATIC forward3.o subgraph3.o) + +add_custom_command( + OUTPUT forward5.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward5.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward5.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward5.mlir + COMMENT "Building forward5.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph5.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph5.mlir + COMMENT "Building subgraph5.o " + VERBATIM) + +add_library(SPLITLLAMA4 STATIC forward5.o subgraph5.o) + +add_custom_command( + OUTPUT forward193.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/forward193.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -func-bufferize + -arith-bufferize + -tensor-bufferize + -buffer-deallocation + -finalizing-bufferize + -convert-vector-to-scf + -expand-strided-metadata + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/forward193.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/forward193.mlir + COMMENT "Building forward193.o " + VERBATIM) + +add_custom_command( + OUTPUT subgraph193.o + COMMAND ${LLVM_TOOLS_BINARY_DIR}/mlir-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.mlir + -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" | + ${BUDDY_BINARY_DIR}/buddy-opt + -convert-elementwise-to-linalg + -arith-expand + -eliminate-empty-tensors + -empty-tensor-to-alloc-tensor + -one-shot-bufferize + -func-bufferize-dynamic-offset + -tensor-bufferize + -arith-bufferize + -buffer-deallocation + -finalizing-bufferize + -matmul-parallel-vectorization-optimize + -batchmatmul-optimize + -convert-linalg-to-affine-loops + -affine-loop-fusion + -affine-parallelize + -lower-affine + -convert-scf-to-openmp + -convert-vector-to-scf + -expand-strided-metadata + -cse + -convert-vector-to-llvm + -memref-expand + -arith-expand + -convert-arith-to-llvm + -finalize-memref-to-llvm + -convert-scf-to-cf + -llvm-request-c-wrappers + -convert-openmp-to-llvm + -convert-arith-to-llvm + -convert-math-to-llvm + -convert-math-to-libm + -convert-func-to-llvm + -reconcile-unrealized-casts | + ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir | + ${LLVM_TOOLS_BINARY_DIR}/llvm-as | + ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3 + -o ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.o + DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR}/subgraph193.mlir + COMMENT "Building subgraph193.o " + VERBATIM) + +set(Boost_INCLUDE_DIR "/home/chenweiwei/boost_1_86_0/include") +add_library(SPLITLLAMA5 STATIC forward193.o subgraph193.o) + +# 查找 Boost 库 +find_package(Boost REQUIRED COMPONENTS system thread) + +# 包含 Boost 头文件 + +include_directories(${Boost_INCLUDE_DIRS}) + +# 添加 websocketpp 头文件路径 +include_directories(/home/chenweiwei/llvm/websocketpp) + +SET_SOURCE_FILES_PROPERTIES( + template.o + PROPERTIES + EXTERNAL_OBJECT true + GENERATED true) + +SET_TARGET_PROPERTIES( + SPLITLLAMA0 + SPLITLLAMA1 + SPLITLLAMA2 + SPLITLLAMA3 + SPLITLLAMA4 + SPLITLLAMA5 + PROPERTIES + LINKER_LANGUAGE C) + +set(LLAMA_SPLIT_EXAMPLE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) +set(LLAMA_EXAMPLE_BUILD_PATH ${CMAKE_CURRENT_BINARY_DIR}) + +set(BUDDY_SPLIT_LLAMA_LIBS + SPLITLLAMA0 + SPLITLLAMA1 + SPLITLLAMA2 + SPLITLLAMA3 + SPLITLLAMA4 + SPLITLLAMA5 + mlir_c_runner_utils + omp +) + +add_executable(buddy-llama-input-run llama-input.cpp) + +target_compile_definitions(buddy-llama-input-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-input-run PRIVATE ${LLVM_LIBRARY_DIR}) + + +target_link_libraries(buddy-llama-input-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-rms-run llama-rms-mha.cpp) + +target_compile_definitions(buddy-llama-rms-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-rms-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-rms-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-rms-0-run llama-rms-mha-0.cpp) + +target_compile_definitions(buddy-llama-rms-0-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-rms-0-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-rms-0-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-mha-run llama-mha.cpp) + +target_compile_definitions(buddy-llama-mha-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-mha-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-mha-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-mha-0-run llama-mha-0.cpp) + +target_compile_definitions(buddy-llama-mha-0-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-mha-0-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-mha-0-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-add-run llama-add-mha.cpp) + +target_compile_definitions(buddy-llama-add-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-add-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-add-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-add-0-run llama-add-mha-0.cpp) + +target_compile_definitions(buddy-llama-add-0-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-add-0-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-add-0-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-rms-mlp-run llama-rms-mlp.cpp) + +target_compile_definitions(buddy-llama-rms-mlp-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-rms-mlp-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-rms-mlp-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-rms-mlp-0-run llama-rms-mlp-0.cpp) + +target_compile_definitions(buddy-llama-rms-mlp-0-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-rms-mlp-0-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-rms-mlp-0-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-mlp-run llama-mlp.cpp) + +target_compile_definitions(buddy-llama-mlp-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-mlp-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-mlp-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-mlp-0-run llama-mlp-0.cpp) + +target_compile_definitions(buddy-llama-mlp-0-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-mlp-0-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-mlp-0-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-add-mlp-run llama-add-mlp.cpp) + +target_compile_definitions(buddy-llama-add-mlp-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-add-mlp-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-add-mlp-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-add-mlp-0-run llama-add-mlp-0.cpp) + +target_compile_definitions(buddy-llama-add-mlp-0-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-add-mlp-0-run PRIVATE ${LLVM_LIBRARY_DIR}) + +target_link_libraries(buddy-llama-add-mlp-0-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) + +add_executable(buddy-llama-output-run llama-output.cpp) + +target_compile_definitions(buddy-llama-output-run PRIVATE + LLAMA_SPLIT_EXAMPLE_PATH="${LLAMA_SPLIT_EXAMPLE_PATH}" + LLAMA_EXAMPLE_BUILD_PATH="${LLAMA_EXAMPLE_BUILD_PATH}" +) + +target_link_directories(buddy-llama-output-run PRIVATE ${LLVM_LIBRARY_DIR}) + + +target_link_libraries(buddy-llama-output-run + PRIVATE + ${BUDDY_SPLIT_LLAMA_LIBS} + ${Boost_LIBRARIES} + pthread + ${CMAKE_THREAD_LIBS_INIT} # 添加线程库 +) diff --git a/examples/SplitLlama/README.md b/examples/SplitLlama/README.md new file mode 100644 index 00000000..889ea475 --- /dev/null +++ b/examples/SplitLlama/README.md @@ -0,0 +1,113 @@ +# Buddy Compiler LLaMA Example + +1. Download LLaMA2 model + +You should download llama model. You can get model from [meta ai](https://ai.meta.com/llama/). + +2. Enter Python virtual environment + +We recommend you to use anaconda3 to create python virtual environment. You should install python packages as buddy-mlir/requirements. + +``` +$ conda activate +$ cd buddy-mlir +$ pip install -r requirements.txt +``` + +3. LLaMA2 model convert to HuggingFace format + +You should convert LLaMA2 model which download from meta ai to HuggingFace format. Because we use HuggingFace api to get LLaMA2 model. + +``` +$ cd examples/BuddyLlama +$ python llama2-to-hf.py --input_dir path-to-llama2-model --model_size 7B --output_dir path-to-save-llama-hf-model +``` + +Such as you have a 7B LLaMA2 model, in your input_dir path-to-llama-model, you should have a tokenizer.model and a directory named "7B". You should put your 7B LLaMA2 model inside the "7B" directory. + +In addition, set an environment variable for the generated LLaMA model. +``` +$ export LLAMA_MODEL_PATH=/path-to-save-llama-hf-model/ +``` + +4. Build and check LLVM/MLIR + +``` +$ cd buddy-mlir +$ mkdir llvm/build +$ cd llvm/build +$ cmake -G Ninja ../llvm \ + -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \ + -DLLVM_TARGETS_TO_BUILD="host;RISCV" \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DOPENMP_ENABLE_LIBOMPTARGET=OFF \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ + -DPython3_EXECUTABLE=$(which python3) +$ ninja check-clang check-mlir omp +``` + +5. Build and check buddy-mlir + +``` +$ cd buddy-mlir +$ mkdir build +$ cd build +$ cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) \ + -DBUDDY_DIS_LLAMA_EXAMPLES=ON +$ ninja +$ ninja check-buddy +``` + +Set the `PYTHONPATH` environment variable. Make sure that the `PYTHONPATH` variable includes the directory of LLVM/MLIR python bindings and the directory of Buddy MLIR python packages. + +``` +$ export PYTHONPATH=/path-to-buddy-mlir/llvm/build/tools/mlir/python_packages/mlir_core:/path-to-buddy-mlir/build/python_packages:${PYTHONPATH} + +// For example: +// Navigate to your buddy-mlir/build directory +$ cd buddy-mlir/build +$ export BUDDY_MLIR_BUILD_DIR=$PWD +$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build +$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH} +``` + +6. Build and run LLaMA example + +``` +$ cmake -G Ninja .. -DBUDDY_SPLIT_LLAMA_EXAMPLES=ON +$ ninja buddy-split-llama-run +$ cd bin +$ ./buddy-dis-llama-run +``` +This build will spend a few minutes. We recommend you to use better cpu such as server-level cpu to run buddy-llama-run. + +If you wish to utilize `mimalloc` as a memory allocator, you need to set `BUDDY_MLIR_USE_MIMALLOC` and `MIMALLOC_BUILD_DIR`. +For more details, please see [here](../../thirdparty/README.md#the-mimalloc-allocator). + +## Testing the segmentation model using a py file + +1. Set the `PYTHONPATH` environment variable. Make sure that the `PYTHONPATH` variable includes the directory of LLVM/MLIR python bindings and the directory of Buddy MLIR python packages. + +``` +$ export PYTHONPATH=/path-to-buddy-mlir/llvm/build/tools/mlir/python_packages/mlir_core:/path-to-buddy-mlir/build/python_packages:${PYTHONPATH} + +// For example: +// Navigate to your buddy-mlir/build directory +$ cd buddy-mlir/build +$ export BUDDY_MLIR_BUILD_DIR=$PWD +$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build +$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH} +``` + +2. Execute the py file +``` +$ cd examples/LlamaTest +$ python3 llama-import.py --output-dir ./ +``` \ No newline at end of file diff --git a/examples/SplitLlama/llama-add-mha-0.cpp b/examples/SplitLlama/llama-add-mha-0.cpp new file mode 100644 index 00000000..fd2eacd3 --- /dev/null +++ b/examples/SplitLlama/llama-add-mha-0.cpp @@ -0,0 +1,34 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaAdd.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + AddMess addMess("AddMess1", 0, shared_queue, 9007, "ws://localhost:9003", + "ws://localhost:9004", "ws://localhost:9005", "ws://localhost:9003"); + Comp comp(shared_queue); + + std::thread add_thread([&addMess] { addMess.run(); }); + std::thread comp_thread([&comp] { comp.run(); }); + + add_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-add-mha.cpp b/examples/SplitLlama/llama-add-mha.cpp new file mode 100644 index 00000000..c0aafc3b --- /dev/null +++ b/examples/SplitLlama/llama-add-mha.cpp @@ -0,0 +1,34 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaAdd.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + AddMess addMess("AddMess0", 0, shared_queue, 9006, "ws://localhost:9002", + "ws://localhost:9004", "ws://localhost:9005", "ws://localhost:9002"); + Comp comp(shared_queue); + + std::thread add_thread([&addMess] { addMess.run(); }); + std::thread comp_thread([&comp] { comp.run(); }); + + add_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-add-mlp-0.cpp b/examples/SplitLlama/llama-add-mlp-0.cpp new file mode 100644 index 00000000..3f42eb74 --- /dev/null +++ b/examples/SplitLlama/llama-add-mlp-0.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaAdd.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + + SharedQueue shared_queue; + AddMess addMess("AddMess1", 1, shared_queue, 9013, "ws://localhost:9009", + "ws://localhost:9010", "ws://localhost:9011", "ws://localhost:9003"); + Comp comp(shared_queue); + + std::thread add_thread([&addMess] { addMess.run(); }); + std::thread comp_thread([&comp] { comp.run(); }); + + add_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-add-mlp.cpp b/examples/SplitLlama/llama-add-mlp.cpp new file mode 100644 index 00000000..3995171b --- /dev/null +++ b/examples/SplitLlama/llama-add-mlp.cpp @@ -0,0 +1,34 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaAdd.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + AddMess addMess("AddMess0", 1, shared_queue, 9012, "ws://localhost:9008", + "ws://localhost:9010", "ws://localhost:9011", "ws://localhost:9002"); + Comp comp(shared_queue); + + std::thread add_thread([&addMess] { addMess.run(); }); + std::thread comp_thread([&comp] { comp.run(); }); + + add_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-import.py b/examples/SplitLlama/llama-import.py new file mode 100644 index 00000000..04d47241 --- /dev/null +++ b/examples/SplitLlama/llama-import.py @@ -0,0 +1,116 @@ +# ===- llama-import.py -------------------------------------------------------- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ===--------------------------------------------------------------------------- +# +# This is the test of llama2 model. +# +# ===--------------------------------------------------------------------------- + +import os +import argparse +import torch +import torch._dynamo as dynamo +from transformers import LlamaForCausalLM, LlamaTokenizer +from torch._inductor.decomposition import decompositions as inductor_decomp +from torchviz import make_dot +import numpy + +from buddy.compiler.frontend import DynamoCompiler +from buddy.compiler.ops import tosa +from buddy.compiler.graph import GraphDriver +from buddy.compiler.graph.transform import simply_fuse, apply_classic_fusion + +class SubModel(torch.nn.Module): + def __init__(self, original_model): + super().__init__() + # 提取前置模块 + self.embed_tokens = original_model.model.embed_tokens + self.layers = torch.nn.ModuleList([original_model.model.layers[0]]) # 仅第一个 Transformer 层 + self.norm = original_model.model.norm # 如果需要包含最后的 LayerNorm + + def forward(self, input_ids, attention_mask, position_ids): + # Embedding 层 + hidden_states = self.embed_tokens(input_ids) + + # 第一个 Transformer 层 + hidden_states = self.layers[0]( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + use_cache=False + )[0] + + return hidden_states + +# Add argument parser to allow custom output directory. +parser = argparse.ArgumentParser(description="LLaMA2 model AOT importer") +parser.add_argument( + "--output-dir", + type=str, + default="./", + help="Directory to save output files." +) +args = parser.parse_args() + +# Ensure the output directory exists. +output_dir = args.output_dir +os.makedirs(output_dir, exist_ok=True) + +# Retrieve the LLaMA model path from environment variables. +model_path = os.environ.get("LLAMA_MODEL_PATH") +if model_path is None: + raise EnvironmentError( + "The environment variable 'LLAMA_MODEL_PATH' is not set or is invalid." + ) + +# Initialize the tokenizer and model from the specified model path. +# LlamaTokenizer用于对文本进行分词。分词器将文本转换为模型可以处理的输入格式(通常是标记或ID序列) +# from_pretrained用于从预训练模型的路径加载分词器配置和词汇表。 +tokenizer = LlamaTokenizer.from_pretrained(model_path, legacy=True) + +# LlamaForCausalLM用于加载和使用LLaMA模型进行因果语言建模任务。 +# from_pretrained用于从预训练模型的路径加载模型权重和配置。 +model = LlamaForCausalLM.from_pretrained(model_path, torchscript=True) +model.config.use_cache = False + +# Initialize Dynamo Compiler with specific configurations as an importer. +dynamo_compiler = DynamoCompiler( + primary_registry=tosa.ops_registry, + aot_autograd_decomposition=inductor_decomp, +) + +# Import the model into MLIR module and parameters. +with torch.no_grad(): + data = torch.tensor([[1 for i in range(40)]], dtype=torch.int64) + graphs = dynamo_compiler.importer(model, data) + +assert len(graphs) == 1 +graph = graphs[0] +params = dynamo_compiler.imported_params[graph] + +driver = GraphDriver(graphs[0]) +for i in range(len(driver.subgraphs)): + driver.subgraphs[i].lower_to_top_level_ir() + +driver.construct_main_graph(True) +# Save the generated files to the specified output directory. +for i in range(len(driver.subgraphs)): + with open(os.path.join(output_dir, f"subgraph{i}.mlir"), "w") as module_file: + print(driver.subgraphs[i]._imported_module, file=module_file) + with open(os.path.join(output_dir, f"forward{i}.mlir"), "w") as module_file: + print(driver.modules[i], file=module_file) + +for entry in driver._subgraph_param_info.items(): + driver.construct_sub_params(params, entry, output_dir) diff --git a/examples/SplitLlama/llama-input.cpp b/examples/SplitLlama/llama-input.cpp new file mode 100644 index 00000000..0530c648 --- /dev/null +++ b/examples/SplitLlama/llama-input.cpp @@ -0,0 +1,57 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaInput.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + /// Print the title of this example. + const std::string title = "LLaMA 2 Inference Powered by Buddy Compiler"; + std::cout << "\033[33;1m" << title << "\033[0m" << std::endl; + + /// Initialize data containers + // - Input container. + // - Result container + // - Parameters container. + MemRef myMemRef1({1, MaxTokenLength, HiddenSize}); + MemRef myMemRef2({MaxTokenLength, HiddenSize1}); + MemRef myMemRef3({1, MaxTokenLength, HiddenSize0}); + MemRef myMemRef4({1, MaxTokenLength, HiddenSize0}); + MemRefContainer resultContainer(myMemRef1, myMemRef2, myMemRef3, myMemRef4); + MemRefContainer *resultContainerPtr = &resultContainer; + + /// Fill data into containers + // - Input: register vocabulary and tokenize the input string. + // - Output: register vocabulary. + // - Parameters: load parameters from the `arg0` file into the container. + + SharedQueue shared_queue; + InputMess inputMess(shared_queue, resultContainerPtr); + Comp comp(shared_queue, resultContainerPtr); + + std::thread input_thread([&inputMess] { inputMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + input_thread.join(); + comp_thread.join(); + + // std::cout << "\n\033[33;1m[Input]\033[0m " << inputStr << std::endl; + + return 0; +} diff --git a/examples/SplitLlama/llama-mha-0.cpp b/examples/SplitLlama/llama-mha-0.cpp new file mode 100644 index 00000000..fae459f7 --- /dev/null +++ b/examples/SplitLlama/llama-mha-0.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaMHA.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + MHAMess mahMess("MHAMess1", shared_queue, 9005, "ws://localhost:9001", "ws://localhost:9002", "ws://localhost:9003"); + Comp comp(shared_queue, "1"); + + std::thread mha_thread([&mahMess] { mahMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + mha_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-mha.cpp b/examples/SplitLlama/llama-mha.cpp new file mode 100644 index 00000000..05c0d176 --- /dev/null +++ b/examples/SplitLlama/llama-mha.cpp @@ -0,0 +1,36 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaMHA.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + + SharedQueue shared_queue; + MHAMess mahMess("MHAMess0", shared_queue, 9004, "ws://localhost:9001", "ws://localhost:9002", "ws://localhost:9003"); + Comp comp(shared_queue, "0"); + + std::thread mha_thread([&mahMess] { mahMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + mha_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-mlp-0.cpp b/examples/SplitLlama/llama-mlp-0.cpp new file mode 100644 index 00000000..71e7dd7f --- /dev/null +++ b/examples/SplitLlama/llama-mlp-0.cpp @@ -0,0 +1,36 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaMLP.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + MLPMess mlpMess("MLPMess1", shared_queue, 9011, "ws://localhost:9008", "ws://localhost:9009"); + Comp comp(shared_queue, "1"); + + std::thread mlp_thread([&mlpMess] { mlpMess.run(); }); + + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + mlp_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-mlp.cpp b/examples/SplitLlama/llama-mlp.cpp new file mode 100644 index 00000000..d90e790c --- /dev/null +++ b/examples/SplitLlama/llama-mlp.cpp @@ -0,0 +1,37 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaMLP.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + + SharedQueue shared_queue; + MLPMess mlpMess("MLPMess0", shared_queue, 9010, "ws://localhost:9008", "ws://localhost:9009"); + Comp comp(shared_queue, "0"); + + std::thread mlp_thread([&mlpMess] { mlpMess.run(); }); + + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + mlp_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-output.cpp b/examples/SplitLlama/llama-output.cpp new file mode 100644 index 00000000..5de7f143 --- /dev/null +++ b/examples/SplitLlama/llama-output.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaOutput.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + OutputMess outputMess(shared_queue, "ws://localhost:9012", "ws://localhost:9013", "ws://localhost:9001"); + Comp comp(shared_queue); + + std::thread output_thread([&outputMess] { outputMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + output_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-rms-mha-0.cpp b/examples/SplitLlama/llama-rms-mha-0.cpp new file mode 100644 index 00000000..46ea8669 --- /dev/null +++ b/examples/SplitLlama/llama-rms-mha-0.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaRMS.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + RMSMess rmsMess("RMSMess1", shared_queue, 9003, "ws://localhost:9001"); + Comp comp(shared_queue, 1); + + std::thread rms_thread([&rmsMess] { rmsMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + rms_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-rms-mha.cpp b/examples/SplitLlama/llama-rms-mha.cpp new file mode 100644 index 00000000..7a4c0414 --- /dev/null +++ b/examples/SplitLlama/llama-rms-mha.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaRMS.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + RMSMess rmsMess("RMSMess0", shared_queue, 9002, "ws://localhost:9001"); + Comp comp(shared_queue, 1); + + std::thread rms_thread([&rmsMess] { rmsMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + rms_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-rms-mlp-0.cpp b/examples/SplitLlama/llama-rms-mlp-0.cpp new file mode 100644 index 00000000..adba253f --- /dev/null +++ b/examples/SplitLlama/llama-rms-mlp-0.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaRMS.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + RMSMess rmsMess("RMSMess1", shared_queue, 9009, "ws://localhost:9007"); + Comp comp(shared_queue, 0); + + std::thread rms_thread([&rmsMess] { rmsMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + rms_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llama-rms-mlp.cpp b/examples/SplitLlama/llama-rms-mlp.cpp new file mode 100644 index 00000000..faa266dd --- /dev/null +++ b/examples/SplitLlama/llama-rms-mlp.cpp @@ -0,0 +1,35 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "llamaRMS.h" +// ----------------------------------------------------------------------------- +// LLaMA Inference Main Entry +// ----------------------------------------------------------------------------- + +int main() { + SharedQueue shared_queue; + RMSMess rmsMess("RMSMess0", shared_queue, 9008, "ws://localhost:9006"); + Comp comp(shared_queue, 0); + + std::thread rms_thread([&rmsMess] { rmsMess.run(); }); + std::this_thread::sleep_for(std::chrono::seconds(1)); + comp.init(); + std::thread comp_thread([&comp] { comp.run(); }); + + rms_thread.join(); + comp_thread.join(); + + return 0; +} diff --git a/examples/SplitLlama/llamaAdd.h b/examples/SplitLlama/llamaAdd.h new file mode 100644 index 00000000..df0b38d7 --- /dev/null +++ b/examples/SplitLlama/llamaAdd.h @@ -0,0 +1,432 @@ +#ifndef LLAMAAdd_H // 作用:防止llamaAdd.h被重复引用 +#define LLAMAAdd_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +using websocketpp::lib::bind; +using websocketpp::lib::placeholders::_1; +using websocketpp::lib::placeholders::_2; + +typedef websocketpp::server server; +typedef websocketpp::client client; + +constexpr size_t MaxVocabSize = 32000; +constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; +constexpr size_t HiddenSize = 4096; +constexpr size_t HiddenSize0 = 128; +constexpr size_t HiddenSize1 = 41; + +/// Declare LLaMA forward function. +extern "C" void _mlir_ciface_forward3(MemRef *, MemRef *, + MemRef *); + +// 共享内存结构(线程安全队列) +class SharedQueue { +public: + void push_input(const MemRef &data) { + std::lock_guard lock(inputMutex); + inputQueue.push(data); + input_cv.notify_one(); + } + + MemRef pop_input() { + std::unique_lock lock(inputMutex); + input_cv.wait(lock, [this] { return !inputQueue.empty(); }); + auto data = inputQueue.front(); + inputQueue.pop(); + return data; + } + + void push_input0(const MemRef &data) { + std::lock_guard lock(input0Mutex); + input0Queue.push(data); + input0_cv.notify_one(); + } + + MemRef pop_input0() { + std::unique_lock lock(input0Mutex); + input0_cv.wait(lock, [this] { return !input0Queue.empty(); }); + auto data = input0Queue.front(); + input0Queue.pop(); + return data; + } + + void push_input1(const MemRef &data) { + std::lock_guard lock(input1Mutex); + input1Queue.push(data); + input1_cv.notify_one(); + } + + MemRef pop_input1() { + std::unique_lock lock(input1Mutex); + input1_cv.wait(lock, [this] { return !input1Queue.empty(); }); + auto data = input1Queue.front(); + input1Queue.pop(); + return data; + } + + void push_output(const MemRef &data) { + std::lock_guard lock(outputMutex); + outputQueue.push(data); + output_cv.notify_one(); + } + + MemRef pop_output() { + std::unique_lock lock(outputMutex); + output_cv.wait(lock, [this] { return !outputQueue.empty(); }); + auto data = outputQueue.front(); + outputQueue.pop(); + return data; + } + +private: + std::queue> inputQueue; + std::queue> input0Queue; + std::queue> input1Queue; + std::queue> outputQueue; + std::mutex inputMutex; + std::mutex input0Mutex; + std::mutex input1Mutex; + std::mutex outputMutex; + std::condition_variable input_cv; + std::condition_variable input0_cv; + std::condition_variable input1_cv; + std::condition_variable output_cv; +}; + +//--------------------- AddMess (主线程) --------------------- +class AddMess { +public: + AddMess(const std::string name, bool isLast, SharedQueue &queue, + const uint16_t &port, const std::string &uri0, + const std::string &uri1, const std::string &uri2, + const std::string &uri3) + : addServer(), name(name), sharedQueue(queue), hdlsSymbol(), + isLast(isLast), + resultContainer(MemRef({1, SubMaxTokenLength, HiddenSize})) { + /// 服务器初始化 + addServer.set_access_channels(websocketpp::log::alevel::none); + addServer.clear_access_channels(websocketpp::log::alevel::all); + addServer.init_asio(); + + // addServer.set_close_handler([this](websocketpp::connection_hdl hdl) { + // std::lock_guard lock(symbolMutex); // 加锁保护符号表 + // auto it = connections.find(hdl); + // if (it != connections.end()) { + // std::string user_id = it->second; + // hdlsSymbol.erase(user_id); + // connections.erase(hdl); + // } + // }); + + addServer.set_message_handler( + bind(&AddMess::on_server_message, this, _1, _2)); + addServer.listen(port); + addServer.set_reuse_addr(true); + addServer.start_accept(); + + /// 客户端初始化 + rmsClient.set_access_channels(websocketpp::log::alevel::none); + rmsClient.clear_access_channels(websocketpp::log::alevel::all); + rmsClient.init_asio(); + rmsClient.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + mhaClient.send(hdl, name, websocketpp::frame::opcode::text); + }); + rmsClient.set_message_handler( + bind(&AddMess::on_rmsClient_message, this, _1, _2)); + websocketpp::lib::error_code rmsec; + auto rmscon = rmsClient.get_connection(uri0, rmsec); + rmsClient.connect(rmscon); + + mhaClient.set_access_channels(websocketpp::log::alevel::none); + mhaClient.clear_access_channels(websocketpp::log::alevel::all); + mhaClient.init_asio(); + mhaClient.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + mhaClient.send(hdl, name, websocketpp::frame::opcode::text); + }); + mhaClient.set_message_handler( + bind(&AddMess::on_mhaClient_message, this, _1, _2)); + websocketpp::lib::error_code mhaec; + auto mhacon = mhaClient.get_connection(uri1, mhaec); + mhaClient.connect(mhacon); + + mhaClient0.set_access_channels(websocketpp::log::alevel::none); + mhaClient0.clear_access_channels(websocketpp::log::alevel::all); + mhaClient0.init_asio(); + mhaClient0.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + mhaClient0.send(hdl, name, websocketpp::frame::opcode::text); + }); + mhaClient0.set_message_handler( + bind(&AddMess::on_mhaClient0_message, this, _1, _2)); + websocketpp::lib::error_code mhaec0; + auto mhacon0 = mhaClient0.get_connection(uri2, mhaec0); + mhaClient0.connect(mhacon0); + + if (isLast) { + rmsClient0.set_access_channels(websocketpp::log::alevel::none); + rmsClient0.clear_access_channels(websocketpp::log::alevel::all); + rmsClient0.init_asio(); + rmsClient0.set_open_handler([this](websocketpp::connection_hdl hdl) { + rmsClient0.send(hdl, "LastAdd", websocketpp::frame::opcode::text); + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol["FirstRMS"] = hdl; + connections[hdl] = "FirstRMS"; + std::cout << "已连接到RMSServer" << std::endl; + }); + websocketpp::lib::error_code rmsec0; + auto rmscon0 = rmsClient0.get_connection(uri3, rmsec0); + rmsClient0.connect(rmscon0); + } + } + + void run() { + std::thread rmsClient_thread([this]() { rmsClient.run(); }); + std::thread mhaClient_thread([this]() { mhaClient.run(); }); + std::thread mhaClient0_thread([this]() { mhaClient0.run(); }); + // 启动 WebSocket 服务器线程 + std::thread server_thread([this]() { addServer.run(); }); + std::thread rmsClient0_thread; + + // 新增:启动输出监听线程,向OutputMess和RMSMess发送数据 + std::thread output_thread([this]() { + while (true) { + resultContainer = sharedQueue.pop_output(); + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + if (isLast) { + if (tfCount == 31) { + auto it = hdlsSymbol.find("OutputMess"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["OutputMess"], dataId++, + {resultContainer.getDataVector()}); + tfCount = 0; + std::cout << "一次Token推理完成." << std::endl; + } else { + std::cout << "OutputMess未连接, 转发失败" << std::endl; + } + } else if (tfCount < 31) { + auto it = hdlsSymbol.find("FirstRMS"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["FirstRMS"], dataId++, + {resultContainer.getDataVector()}); + tfCount++; + std::cout << "第" << tfCount << "次transformer层推理完成." + << std::endl; + } else { + std::cout << "未连接FirstRMS, 转发失败" << std::endl; + } + } else { + std::cout << "transformer层推理次数过多" << std::endl; + } + } else { + auto it = hdlsSymbol.find("RMSMess"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["RMSMess"], dataId++, + {resultContainer.getDataVector()}); + std::cout << "转发成功 " << std::endl; + } else { + std::cout << "RMSMess未连接, 转发失败" << std::endl; + } + } + } + }); + if (isLast) { + rmsClient0_thread = std::thread([this]() { rmsClient0.run(); }); + } + rmsClient_thread.join(); + mhaClient_thread.join(); + mhaClient0_thread.join(); + server_thread.join(); + output_thread.join(); + rmsClient0_thread.join(); + } + +private: + server addServer; + client rmsClient; + client rmsClient0; + client mhaClient; + client mhaClient0; + const std::string name; + SharedQueue &sharedQueue; + std::map hdlsSymbol; + std::map> + connections; + websocketpp::connection_hdl firstRMSHdl; + std::mutex symbolMutex; // 保护 hdlsSymbol 的互斥锁 + MemRef resultContainer; + // 确保对dataId的操作是​​原子​​的 + std::atomic dataId = 0; + std::mutex dataMutex; + // 记录已经进行过的Transformer层计算次数 + uint32_t tfCount = 0; + // 是否是最后一个add模块 + bool isLast; + + void send_data(websocketpp::connection_hdl hdl, uint32_t dataId, + const std::vector> &data) { + const uint8_t total = data.size(); + if (addServer.get_con_from_hdl(hdl)->get_state() != + websocketpp::session::state::open) + return; + + for (uint8_t i = 0; i < total; ++i) { + const auto &subdata = data[i]; + + // 构造协议头 + std::vector packet(10); // 4+1+1+2=8字节头 + memcpy(packet.data(), &dataId, 4); + packet[4] = total; + packet[5] = i; + uint32_t num = subdata.size(); + memcpy(packet.data() + 6, &num, 4); + + // 添加浮点数据 + const uint8_t *binaryData = + reinterpret_cast(subdata.data()); + packet.insert(packet.end(), binaryData, + binaryData + subdata.size() * sizeof(float)); + + addServer.send(hdl, packet.data(), packet.size(), + websocketpp::frame::opcode::binary); + } + } + + void on_server_message(websocketpp::connection_hdl hdl, + server::message_ptr msg) { + std::string payload = msg->get_payload(); + if (payload.find("RMSMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol["RMSMess"] = hdl; + connections[hdl] = payload; + std::cout << payload << " 已连接" << std::endl; + return; + } else if (payload.find("OutputMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol[payload] = hdl; + connections[hdl] = payload; + std::cout << payload << " 已连接" << std::endl; + return; + } + } + + std::vector getFloatData(client::message_ptr msg) { + if (msg->get_opcode() != websocketpp::frame::opcode::binary) { + std::cout << "忽略非二进制消息" << std::endl; + return {}; + } + + const std::string &payload = msg->get_payload(); + if (payload.size() < 10) { + std::cerr << "错误: 协议头不完整(需要至少10字节)" << std::endl; + return {}; + } + + // 解析协议头 + uint32_t batch_id; + uint8_t totalChunks, seqChunk; + uint32_t num_elements; + + memcpy(&batch_id, payload.data(), 4); + totalChunks = payload[4]; + seqChunk = payload[5]; + memcpy(&num_elements, payload.data() + 6, 4); + + // 验证分块序号有效性 + if (seqChunk >= totalChunks) { + std::cerr << "错误:非法分块序号 " << (int)seqChunk + << " (总块数=" << (int)totalChunks << ")" << std::endl; + return {}; + } + + // 验证数据长度 + const size_t expectedSize = 10 + num_elements * sizeof(float); + if (payload.size() != expectedSize) { + std::cerr << "错误:数据长度不匹配(预期=" << expectedSize + << " 实际=" << payload.size() << ")" << std::endl; + return {}; + } + + // 提取浮点数据 + const float *float_data = + reinterpret_cast(payload.data() + 10); + std::vector chunk(float_data, float_data + num_elements); + return chunk; + } + + void on_mhaClient_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[2] = {SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input0(subResultContainer); + std::cout << "接收到MHAMess0数据" << std::endl; + } + + void on_mhaClient0_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[2] = {SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input1(subResultContainer); + std::cout << "接收到MHAMess1数据" << std::endl; + } + + void on_rmsClient_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input(subResultContainer); + std::cout << "接收到RMSMess数据" << std::endl; + } +}; + +//--------------------- Comp (子线程) --------------------- +class Comp { +public: + Comp(SharedQueue &queue) : sharedQueue(queue) {} + + void run() { + while (true) { + MemRef input1 = sharedQueue.pop_input1(); + MemRef input2 = sharedQueue.pop_input(); + MemRef input0 = sharedQueue.pop_input0(); + input0.addMemRef(input0, input1); + MemRef resultContainer({1, SubMaxTokenLength, HiddenSize}); + _mlir_ciface_forward3(&resultContainer, &input0, &input2); + std::cout << "forward3 computed." << std::endl; + sharedQueue.push_output(resultContainer); + } + } + +private: + SharedQueue &sharedQueue; +}; + +#endif // LLAMAAdd_H diff --git a/examples/SplitLlama/llamaInput.h b/examples/SplitLlama/llamaInput.h new file mode 100644 index 00000000..7ac3dcbe --- /dev/null +++ b/examples/SplitLlama/llamaInput.h @@ -0,0 +1,343 @@ +//===- llama-main.cpp -----------------------------------------------------===// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include +#include +#include +#include +#include "BaseDisModel.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +using websocketpp::lib::bind; +using websocketpp::lib::placeholders::_1; +using websocketpp::lib::placeholders::_2; + +typedef websocketpp::server server; +typedef websocketpp::client client; + +constexpr size_t MaxVocabSize = 32000; +constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; +constexpr size_t HiddenSize = 4096; +constexpr size_t HiddenSize0 = 128; +constexpr size_t HiddenSize1 = 41; +constexpr size_t ParamSize = 131072064; + +struct MemRefContainer { + MemRef memRef3D0; + MemRef memRef2D; + MemRef memRef3D1; + MemRef memRef3D2; + + MemRefContainer(MemRef m1, MemRef m2, MemRef m3, + MemRef m4) + : memRef3D0(std::move(m1)), memRef2D(std::move(m2)), + memRef3D1(std::move(m3)), memRef3D2(std::move(m4)) {} +}; + +/// Declare LLaMA forward function. +extern "C" void _mlir_ciface_forward0(MemRefContainer *, MemRef *, + Text *); + +/// Capture input message. +void getUserInput(std::string &inputStr) { + std::cout << "\nPlease send a message:" << std::endl; + std::cout << ">>> "; + getline(std::cin, inputStr); + std::cout << std::endl; +} + +/// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +/// Print information for each iteration. +void printIterInfo(size_t iterIdx, std::string str) { + std::cout << "\033[32;1m[Iteration " << iterIdx << "] \033[0m"; + std::cout << "Token: " << str << std::endl; +} + +/// Tokenize input data in the container. +void tokenizeInput(const std::string &vocabFile, + Text &inputContainer) { + printLogLabel(); + std::cout << "Vocab file: " << std::filesystem::canonical(vocabFile) + << std::endl; + const auto buddyTokenizeStart = std::chrono::high_resolution_clock::now(); + inputContainer.tokenizeLlama(vocabFile, MaxTokenLength); + const auto buddyTokenizeEnd = std::chrono::high_resolution_clock::now(); + const std::chrono::duration buddyTokenizeTime = + buddyTokenizeEnd - buddyTokenizeStart; + printLogLabel(); + std::cout << "Tokenize time: " << buddyTokenizeTime.count() << "ms" + << std::endl; +} + +// 共享内存结构(线程安全队列) +class SharedQueue { +public: + void push_input(const std::any &data) { + std::lock_guard lock(input_mutex); + input_queue.push(data); + input_cv.notify_one(); + } + + std::any pop_input() { + std::unique_lock lock(input_mutex); + input_cv.wait(lock, [this] { return !input_queue.empty(); }); + auto data = input_queue.front(); + input_queue.pop(); + return data; + } + + void push_output(const std::any &data) { + std::lock_guard lock(output_mutex); + output_queue.push(data); + output_cv.notify_one(); + } + + std::any pop_output() { + std::unique_lock lock(output_mutex); + output_cv.wait(lock, [this] { return !output_queue.empty(); }); + auto data = output_queue.front(); + output_queue.pop(); + return data; + } + +private: + std::queue input_queue; + std::queue output_queue; + std::mutex input_mutex; + std::mutex output_mutex; + std::condition_variable input_cv; + std::condition_variable output_cv; +}; + +//--------------------- InputMess (主线程) --------------------- +class InputMess { +public: + InputMess(SharedQueue &queue, MemRefContainer *resultContainerPtr) + : inputServer(), shared_queue(queue), hdlsSymbol(), inputContainer(), + resultContainerPtr(resultContainerPtr), + memRef3D0(MemRef({1, MaxTokenLength, HiddenSize})), + memRef2D(MemRef({MaxTokenLength, HiddenSize1})), + memRef3D1(MemRef({1, MaxTokenLength, HiddenSize0})), + memRef3D2(MemRef({1, MaxTokenLength, HiddenSize0})), + subResultContainer0( + MemRef({1, SubMaxTokenLength, HiddenSize})), + subResultContainer1( + MemRef({1, SubMaxTokenLength, HiddenSize})), + dataId(0) { + inputServer.set_access_channels(websocketpp::log::alevel::none); + inputServer.clear_access_channels(websocketpp::log::alevel::all); + inputServer.init_asio(); + + inputServer.set_message_handler( + bind(&InputMess::on_server_message, this, _1, _2)); + inputServer.set_reuse_addr(true); + inputServer.listen(9001); + inputServer.start_accept(); + } + + void run() { + // 启动 WebSocket 服务器线程 + std::thread server_thread([this]() { inputServer.run(); }); + + // 新增:启动输出监听线程,向RMSMess发送数据 + std::thread output_thread([this]() { + while (true) { + resultContainerPtr = + std::any_cast(shared_queue.pop_output()); + memRef3D0 = resultContainerPtr->memRef3D0; + memRef2D = resultContainerPtr->memRef2D; + memRef3D1 = resultContainerPtr->memRef3D1; + memRef3D2 = resultContainerPtr->memRef3D2; + memRef3D0.splitMemRef(std::move(memRef3D0), subResultContainer0, + subResultContainer1, 1, 20); + + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + auto it = hdlsSymbol.find("RMSMess0"); + if (it != hdlsSymbol.end()) { + try { + send_data(hdlsSymbol["RMSMess0"], dataId++, + {subResultContainer0.getDataVector()}); + send_data(hdlsSymbol["RMSMess1"], dataId++, + {subResultContainer1.getDataVector()}); + std::cout << "成功向RMSMess发送数据" << std::endl; + send_data(hdlsSymbol["MHAMess0"], dataId++, + {memRef2D.getDataVector(), memRef3D1.getDataVector(), + memRef3D2.getDataVector()}); + send_data(hdlsSymbol["MHAMess1"], dataId++, + {memRef2D.getDataVector(), memRef3D1.getDataVector(), + memRef3D2.getDataVector()}); + std::cout << "成功向MHAMess发送数据" << std::endl; + } catch (const websocketpp::exception &e) { + std::cout << "转发失败: " << e.what() << std::endl; + } + } else { + std::cout << "RMSMess未连接, 丢弃结果: " << "result" << std::endl; + } + } + }); + + server_thread.join(); + output_thread.join(); + } + +private: + server inputServer; + SharedQueue &shared_queue; + std::map hdlsSymbol; + std::map> + connections; + std::mutex symbolMutex; // 保护 hdlsSymbol 的互斥锁 + std::mutex hdlMutex; + Text inputContainer; + MemRef memRef3D0; + MemRef memRef2D; + MemRef memRef3D1; + MemRef memRef3D2; + MemRefContainer *resultContainerPtr; + MemRef subResultContainer0; + MemRef subResultContainer1; + + /// Define directories of vacabulary and file. + std::string llamaDir = LLAMA_SPLIT_EXAMPLE_PATH; + const std::string vocabDir = llamaDir + "/vocab.txt"; + + // 确保对dataId的操作是​​原子​​的 + std::atomic dataId; + + void send_data(websocketpp::connection_hdl hdl, uint32_t dataId, + const std::vector> &data) { + const uint8_t total = data.size(); + + if (inputServer.get_con_from_hdl(hdl)->get_state() != + websocketpp::session::state::open) + return; + + for (uint8_t i = 0; i < total; ++i) { + const auto &subdata = data[i]; + + // 构造协议头 + std::vector packet(10); // 4+1+1+2=8字节头 + memcpy(packet.data(), &dataId, 4); + packet[4] = total; + packet[5] = i; + uint32_t num = subdata.size(); + memcpy(packet.data() + 6, &num, 4); + + // 添加浮点数据 + const uint8_t *binaryData = + reinterpret_cast(subdata.data()); + packet.insert(packet.end(), binaryData, + binaryData + subdata.size() * sizeof(float)); + + inputServer.send(hdl, packet.data(), packet.size(), + websocketpp::frame::opcode::binary); + } + } + + void on_server_message(websocketpp::connection_hdl hdl, + server::message_ptr msg) { + std::string payload = msg->get_payload(); + if (payload.find("RMSMess") != std::string::npos || + payload.find("MHAMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol[payload] = hdl; + connections[hdl] = payload; + std::cout << payload << " 已连接" << std::endl; + } else if (payload == "OutputMess") { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol[payload] = hdl; + connections[hdl] = payload; + std::cout << payload << " 已连接" << std::endl; + + // 获取用户输入 + std::string inputStr; + getUserInput(inputStr); + // 创建并tokenize输入容器 + inputContainer = Text(inputStr); + tokenizeInput(vocabDir, inputContainer); + // 将输入压入队列 + shared_queue.push_input(inputContainer); + int tokenCnt = inputContainer.getTokenCnt(); + inputServer.send(hdl, std::to_string(tokenCnt), + websocketpp::frame::opcode::text); + } else { + // 获取客户端类型 + int maxIndex = std::stoi(payload); + // Determine the generated token. + int tokenIndex = inputContainer.getTokenCnt() - 1; + std::string tok = inputContainer.getStr(maxIndex); + printIterInfo(tokenIndex, tok); + + // Append the generated token into the input and output container. + inputContainer.appendTokenIdx(maxIndex); + + shared_queue.push_input(inputContainer); + } + } +}; + +//--------------------- Comp (子线程) --------------------- +class Comp { +public: + Comp(SharedQueue &queue, MemRefContainer *resultContainerPtr) + : shared_queue(queue), resultContainerPtr(resultContainerPtr), + paramsContainer({ParamSize}) { + + } + void init() { loadAllParameters(); } + void run() { + while (true) { + auto input = std::any_cast>(shared_queue.pop_input()); + _mlir_ciface_forward0(resultContainerPtr, ¶msContainer, &input); + shared_queue.push_output(resultContainerPtr); + std::cout << "forward0 computed." << std::endl; + } + } + +private: + SharedQueue &shared_queue; + MemRefContainer *resultContainerPtr; + MemRef paramsContainer; + + void loadAllParameters() { + /// Define directories of parameter file. + std::string llamaBuildDir = LLAMA_EXAMPLE_BUILD_PATH; + std::string paramsDir = + llamaBuildDir + "/subgraph0_arg0.data"; // 权重文件路径 + BaseDisModel::loadParameters(paramsDir, paramsContainer); + } +}; diff --git a/examples/SplitLlama/llamaMHA.h b/examples/SplitLlama/llamaMHA.h new file mode 100644 index 00000000..26ed758b --- /dev/null +++ b/examples/SplitLlama/llamaMHA.h @@ -0,0 +1,474 @@ +#ifndef LLAMAMHA_H // 作用:防止llamaMHA.h被重复引用 +#define LLAMAMHA_H +#include +#include +#include +#include +#include "BaseDisModel.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +using websocketpp::lib::bind; +using websocketpp::lib::placeholders::_1; +using websocketpp::lib::placeholders::_2; + +typedef websocketpp::server server; +typedef websocketpp::client client; + +constexpr size_t MaxVocabSize = 32000; +constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; +constexpr size_t HiddenSize = 4096; +constexpr size_t HiddenSize0 = 128; +constexpr size_t HiddenSize1 = 41; + +/// Declare LLaMA forward function. +extern "C" void _mlir_ciface_forward2(MemRef *, MemRef *, + MemRef *, MemRef *, + MemRef *, MemRef *); + +// 共享内存结构(线程安全队列) +class SharedQueue { +public: + void push_input(const std::any &data) { + std::lock_guard lock(inputMutex); + inputQueue.push(data); + input_cv.notify_one(); + } + + std::any pop_input() { + std::unique_lock lock(inputMutex); + input_cv.wait(lock, [this] { return !inputQueue.empty(); }); + auto data = inputQueue.front(); + inputQueue.pop(); + return data; + } + + void push_input0(const MemRef &data) { + std::lock_guard lock(input0Mutex); + input0Queue.push(data); + input0_cv.notify_one(); + } + + MemRef pop_input0() { + std::unique_lock lock(input0Mutex); + input0_cv.wait(lock, [this] { return !input0Queue.empty(); }); + auto data = input0Queue.front(); + input0Queue.pop(); + return data; + } + + void push_input1(const MemRef &data) { + std::lock_guard lock(input1Mutex); + input1Queue.push(data); + input1_cv.notify_one(); + } + + MemRef pop_input1() { + std::unique_lock lock(input1Mutex); + input1_cv.wait(lock, [this] { return !input1Queue.empty(); }); + auto data = input1Queue.front(); + input1Queue.pop(); + return data; + } + + void push_output(const MemRef &data) { + std::lock_guard lock(outputMutex); + outputQueue.push(data); + output_cv.notify_one(); + } + + MemRef pop_output() { + std::unique_lock lock(outputMutex); + output_cv.wait(lock, [this] { return !outputQueue.empty(); }); + auto data = outputQueue.front(); + outputQueue.pop(); + return data; + } + + u_int32_t inputQueueSize() { + std::lock_guard lock(inputMutex); + return inputQueue.size(); + } + +private: + std::queue inputQueue; + std::queue> input0Queue; + std::queue> input1Queue; + std::queue> outputQueue; + std::mutex inputMutex; + std::mutex input0Mutex; + std::mutex input1Mutex; + std::mutex outputMutex; + std::condition_variable input_cv; + std::condition_variable input0_cv; + std::condition_variable input1_cv; + std::condition_variable output_cv; +}; + +//--------------------- MHAMess (主线程) --------------------- +class MHAMess { +public: + MHAMess(const std::string name, SharedQueue &queue, const uint16_t &port, + const std::string &uri0, const std::string &uri1, + const std::string &uri2) + : mhaServer(), name(name), sharedQueue(queue), hdlsSymbol(), + resultContainer(MemRef({MaxTokenLength, HiddenSize})), + dataId(0) { + /// 服务器初始化 + mhaServer.set_access_channels(websocketpp::log::alevel::none); + mhaServer.clear_access_channels(websocketpp::log::alevel::all); + mhaServer.init_asio(); + + // mhaServer.set_close_handler([this](websocketpp::connection_hdl hdl) { + // std::lock_guard lock(symbolMutex); // 加锁保护符号表 + // auto it = connections.find(hdl); + // if (it != connections.end()) { + // std::string user_id = it->second; + // hdlsSymbol.erase(user_id); + // connections.erase(hdl); + // } + // }); + + mhaServer.set_message_handler( + bind(&MHAMess::on_server_message, this, _1, _2)); + mhaServer.listen(port); + mhaServer.set_reuse_addr(true); + mhaServer.start_accept(); + + /// 客户端初始化 + // 禁用客户端日志 + rmsClient.set_access_channels(websocketpp::log::alevel::none); + rmsClient.clear_access_channels(websocketpp::log::alevel::all); + // 初始化服务器并绑定ioService + rmsClient.init_asio(); + // 设置服务器的消息回调 + rmsClient.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + rmsClient.send(hdl, name, websocketpp::frame::opcode::text); + }); + rmsClient.set_message_handler( + bind(&MHAMess::on_rmsClient_message, this, _1, _2)); + // 启动连接 + websocketpp::lib::error_code rmsec; + auto rmscon = rmsClient.get_connection(uri1, rmsec); + rmsClient.connect(rmscon); + + // 禁用客户端日志 + rmsClient0.set_access_channels(websocketpp::log::alevel::none); + rmsClient0.clear_access_channels(websocketpp::log::alevel::all); + // 初始化服务器并绑定ioService + rmsClient0.init_asio(); + // 设置服务器的消息回调 + rmsClient0.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + rmsClient0.send(hdl, name, websocketpp::frame::opcode::text); + }); + rmsClient0.set_message_handler( + bind(&MHAMess::on_rmsClient0_message, this, _1, _2)); + // 启动连接 + websocketpp::lib::error_code rmsec0; + auto rmscon0 = rmsClient0.get_connection(uri2, rmsec0); + rmsClient0.connect(rmscon0); + + // 禁用客户端日志 + inputClient.set_access_channels(websocketpp::log::alevel::none); + inputClient.clear_access_channels(websocketpp::log::alevel::all); + // 初始化服务器并绑定ioService + inputClient.init_asio(); + // 设置服务器的消息回调 + inputClient.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + rmsClient.send(hdl, name, websocketpp::frame::opcode::text); + }); + inputClient.set_message_handler( + bind(&MHAMess::on_inputClient_message, this, _1, _2)); + // 启动连接 + websocketpp::lib::error_code inputec; + auto inputcon = inputClient.get_connection(uri0, inputec); + inputClient.connect(inputcon); + } + + void run() { + std::thread inputClient_thread([this]() { inputClient.run(); }); + std::thread rmsClient_thread([this]() { rmsClient.run(); }); + std::thread rmsClient0_thread([this]() { rmsClient0.run(); }); + // 启动 WebSocket 服务器线程 + std::thread server_thread([this]() { mhaServer.run(); }); + + // 新增:启动输出监听线程,向AddMess发送数据 + std::thread output_thread([this]() { + while (true) { + resultContainer = sharedQueue.pop_output(); + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + MemRef subResultContainer0({SubMaxTokenLength, HiddenSize}); + MemRef subResultContainer1({SubMaxTokenLength, HiddenSize}); + resultContainer.splitMemRef(std::move(resultContainer), + subResultContainer0, subResultContainer1, 0, + 20); + auto it = hdlsSymbol.find("AddMess0"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["AddMess0"], dataId++, + {subResultContainer0.getDataVector()}); + send_data(hdlsSymbol["AddMess1"], dataId++, + {subResultContainer1.getDataVector()}); + std::cout << "转发成功" << std::endl; + } else { + std::cout << "AddMess0未连接, 丢弃结果: " << "result" << std::endl; + } + } + }); + inputClient_thread.join(); + rmsClient_thread.join(); + rmsClient0_thread.join(); + server_thread.join(); + output_thread.join(); + } + +private: + server mhaServer; + client inputClient; + client rmsClient; + client rmsClient0; + const std::string name; + SharedQueue &sharedQueue; + std::map hdlsSymbol; + std::map> + connections; + std::mutex symbolMutex; // 保护 hdlsSymbol 的互斥锁 + MemRef resultContainer; + // 确保对dataId的操作是​​原子​​的 + std::atomic dataId; + std::mutex dataMutex; + std::mutex sequenceMutex; + // 存储每个参数的shape + std::vector> inputSizes = { + {MaxTokenLength, HiddenSize1}, + {1, MaxTokenLength, HiddenSize0}, + {1, MaxTokenLength, HiddenSize0}}; + // 表示最近从其他服务器得到的数据块在数据组内的序号 + uint8_t currentSequence; + + void send_data(websocketpp::connection_hdl hdl, uint32_t dataId, + const std::vector> &data) { + const uint8_t total = data.size(); + + if (mhaServer.get_con_from_hdl(hdl)->get_state() != + websocketpp::session::state::open) + return; + + for (uint8_t i = 0; i < total; ++i) { + const auto &subdata = data[i]; + + // 构造协议头 + std::vector packet(10); // 4+1+1+4=10字节头 + memcpy(packet.data(), &dataId, 4); + packet[4] = total; + packet[5] = i; + uint32_t num = subdata.size(); + memcpy(packet.data() + 6, &num, 4); + + // 添加浮点数据 + const uint8_t *binaryData = + reinterpret_cast(subdata.data()); + packet.insert(packet.end(), binaryData, + binaryData + subdata.size() * sizeof(float)); + + mhaServer.send(hdl, packet.data(), packet.size(), + websocketpp::frame::opcode::binary); + } + } + + std::vector getFloatData(client::message_ptr msg) { + if (msg->get_opcode() != websocketpp::frame::opcode::binary) { + std::cout << "忽略非二进制消息" << std::endl; + return {}; + } + + const std::string &payload = msg->get_payload(); + if (payload.size() < 10) { + std::cerr << "错误: 协议头不完整(需要至少10字节)" << std::endl; + return {}; + } + + // 解析协议头 + uint32_t batch_id; + uint8_t totalChunks, seqChunk; + uint32_t num_elements; + + memcpy(&batch_id, payload.data(), 4); + totalChunks = payload[4]; + { + std::lock_guard lock(sequenceMutex); + currentSequence = payload[5]; + } + memcpy(&num_elements, payload.data() + 6, 4); + + // 验证分块序号有效性 + if (currentSequence >= totalChunks) { + std::cerr << "错误:非法分块序号 " << (int)currentSequence + << " (总块数=" << (int)totalChunks << ")" << std::endl; + return {}; + } + + // 验证数据长度 + const size_t expectedSize = 10 + num_elements * sizeof(float); + if (payload.size() != expectedSize) { + std::cerr << "错误:数据长度不匹配(预期=" << expectedSize + << " 实际=" << payload.size() << ")" << std::endl; + return {}; + } + + // 提取浮点数据 + const float *float_data = + reinterpret_cast(payload.data() + 10); + std::vector chunk(float_data, float_data + num_elements); + return chunk; + } + + void on_server_message(websocketpp::connection_hdl hdl, + server::message_ptr msg) { + std::string payload = msg->get_payload(); + if (payload.find("AddMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol[payload] = hdl; + connections[hdl] = payload; + std::cout << payload << " 已连接" << std::endl; + return; + } + } + + void on_rmsClient_message(websocketpp::connection_hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input0(subResultContainer); + std::cout << "接收到RMSMess数据" << std::endl; + } + + void on_rmsClient0_message(websocketpp::connection_hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input1(subResultContainer); + std::cout << "接收到RMSMess数据" << std::endl; + } + + void on_inputClient_message(websocketpp::connection_hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + int sequence = (int)currentSequence; + + // 构造 MemRef 并推入队列 + if (sequence == 0) { + intptr_t sizes[2] = {MaxTokenLength, HiddenSize1}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input(subResultContainer); + } else { + MemRef subResultContainer(chunk.data(), inputSizes[sequence]); + sharedQueue.push_input(subResultContainer); + } + std::cout << "接收到InputMess数据" << std::endl; + } +}; + +//--------------------- Comp (子线程) --------------------- +class Comp { +public: + Comp(SharedQueue &queue, const std::string splitNum = "0") + : sharedQueue(queue), splitNum(splitNum), + currentInput1(MemRef({MaxTokenLength, HiddenSize1})), + currentInput2(MemRef({1, MaxTokenLength, HiddenSize0})), + currentInput3(MemRef({1, MaxTokenLength, HiddenSize0})) {} + + void init() { loadAllParameters(); } + + void run() { + while (true) { + // 非阻塞检查是否有新输入 + if (!index) { + updateParams(); // 原子更新三个参数 + } + std::lock_guard lock(inputMutex); + MemRef rmsInput0 = sharedQueue.pop_input0(); + MemRef rmsInput1 = sharedQueue.pop_input1(); + MemRef input0({1, MaxTokenLength, HiddenSize}); + input0.concatenateMemRefs(rmsInput0, rmsInput1, input0, 1); + MemRef resultContainer({MaxTokenLength, HiddenSize}); + _mlir_ciface_forward2(&resultContainer, ¶msContainers[index], &input0, + ¤tInput2, ¤tInput3, ¤tInput1); + std::cout << "第" << index << "次forward2 computed." << std::endl; + sharedQueue.push_output(resultContainer); + index = (index + 1) % 32; + } + } + +private: + SharedQueue &sharedQueue; + std::vector> paramsContainers; + uint32_t index = 0; + const std::string splitNum; + MemRef currentInput1; + MemRef currentInput2; + MemRef currentInput3; + std::mutex inputMutex; // 保护参数更新 + + void loadAllParameters() { + constexpr size_t paramSize_group[] = { + 131072064, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 131076096}; + size_t group_len = sizeof(paramSize_group) / sizeof(paramSize_group[0]); + BaseDisModel::getParameters(paramSize_group, group_len, 33554432, + splitNum, paramsContainers); + } + + void updateParams() { + std::lock_guard lock(inputMutex); + currentInput1 = std::any_cast>(sharedQueue.pop_input()); + currentInput2 = std::any_cast>(sharedQueue.pop_input()); + currentInput3 = std::any_cast>(sharedQueue.pop_input()); + std::cout << "额外参数已更新" << std::endl; + } +}; + +#endif // LLAMAMHA_H diff --git a/examples/SplitLlama/llamaMLP.h b/examples/SplitLlama/llamaMLP.h new file mode 100644 index 00000000..6fdfc507 --- /dev/null +++ b/examples/SplitLlama/llamaMLP.h @@ -0,0 +1,409 @@ +#ifndef LLAMAMLP_H // 作用:防止llamaMLP.h被重复引用 +#define LLAMAMLP_H +#include +#include +#include +#include +#include "BaseDisModel.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +using websocketpp::lib::bind; +using websocketpp::lib::placeholders::_1; +using websocketpp::lib::placeholders::_2; + +typedef websocketpp::server server; +typedef websocketpp::client client; + +constexpr size_t MaxVocabSize = 32000; +constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; +constexpr size_t HiddenSize = 4096; +constexpr size_t HiddenSize0 = 128; +constexpr size_t HiddenSize1 = 41; + +/// Declare LLaMA forward function. +extern "C" void _mlir_ciface_forward5(MemRef *, MemRef *, + MemRef *); + +// 共享内存结构(线程安全队列) +class SharedQueue { +public: + void push_input0(const MemRef &data) { + std::lock_guard lock(input0Mutex); + input0Queue.push(data); + input0_cv.notify_one(); + } + + MemRef pop_input0() { + std::unique_lock lock(input0Mutex); + input0_cv.wait(lock, [this] { return !input0Queue.empty(); }); + auto data = input0Queue.front(); + input0Queue.pop(); + return data; + } + + void push_input1(const MemRef &data) { + std::lock_guard lock(input1Mutex); + input1Queue.push(data); + input1_cv.notify_one(); + } + + MemRef pop_input1() { + std::unique_lock lock(input1Mutex); + input1_cv.wait(lock, [this] { return !input1Queue.empty(); }); + auto data = input1Queue.front(); + input1Queue.pop(); + return data; + } + + void push_output(const MemRef &data) { + std::lock_guard lock(outputMutex); + outputQueue.push(data); + output_cv.notify_one(); + } + + MemRef pop_output() { + std::unique_lock lock(outputMutex); + output_cv.wait(lock, [this] { return !outputQueue.empty(); }); + auto data = outputQueue.front(); + outputQueue.pop(); + return data; + } + +private: + std::queue> input0Queue; + std::queue> input1Queue; + std::queue> outputQueue; + std::mutex input0Mutex; + std::mutex input1Mutex; + std::mutex outputMutex; + std::condition_variable input0_cv; + std::condition_variable input1_cv; + std::condition_variable output_cv; +}; + +//--------------------- MLPMess (主线程) --------------------- +class MLPMess { +public: + MLPMess(const std::string name, SharedQueue &queue, const uint16_t &port, + const std::string &uri0, const std::string &uri1) + : mlpServer(), name(name), sharedQueue(queue), hdlsSymbol(), + resultContainer(MemRef({MaxTokenLength, HiddenSize})), + dataId(0) { + /// 服务器初始化 + mlpServer.set_access_channels(websocketpp::log::alevel::all); + mlpServer.clear_access_channels(websocketpp::log::alevel::all); + mlpServer.init_asio(); + + mlpServer.set_close_handler([this](websocketpp::connection_hdl hdl) { + std::lock_guard lock(symbolMutex); + auto it = connections.find(hdl); + if (it != connections.end()) { + std::string user_id = it->second; + hdlsSymbol.erase(user_id); + connections.erase(hdl); + std::cout << user_id << " 已断开连接" << std::endl; + } + }); + + mlpServer.set_message_handler( + bind(&MLPMess::on_server_message, this, _1, _2)); + mlpServer.listen(port); + mlpServer.set_reuse_addr(true); + mlpServer.start_accept(); + + /// 客户端初始化 + // 禁用客户端日志 + rmsClient.set_access_channels(websocketpp::log::alevel::all); + rmsClient.clear_access_channels(websocketpp::log::alevel::all); + // 初始化服务器并绑定ioService + rmsClient.init_asio(); + // 设置服务器的消息回调 + rmsClient.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + rmsClient.send(hdl, name, websocketpp::frame::opcode::text); + }); + rmsClient.set_message_handler( + bind(&MLPMess::on_rmsClient_message, this, _1, _2)); + // 启动连接 + websocketpp::lib::error_code rmsec; + auto rmscon = rmsClient.get_connection(uri0, rmsec); + if (rmsec) { + std::cerr << "连接RMSClient错误: " << rmsec.message() << std::endl; + // 处理错误,例如重试或退出 + } else { + rmsClient.connect(rmscon); + } + + // 禁用客户端日志 + rmsClient0.set_access_channels(websocketpp::log::alevel::all); + rmsClient0.clear_access_channels(websocketpp::log::alevel::all); + // 初始化服务器并绑定ioService + rmsClient0.init_asio(); + // 设置服务器的消息回调 + rmsClient0.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + rmsClient0.send(hdl, name, websocketpp::frame::opcode::text); + }); + rmsClient0.set_message_handler( + bind(&MLPMess::on_rmsClient0_message, this, _1, _2)); + // 启动连接 + websocketpp::lib::error_code rmsec0; + auto rmscon0 = rmsClient0.get_connection(uri1, rmsec0); + if (rmsec0) { + std::cerr << "连接RMSClient0错误: " << rmsec0.message() << std::endl; + } else { + rmsClient0.connect(rmscon0); + } + rmsClient.set_open_handshake_timeout(5000); // 5秒超时 + rmsClient0.set_open_handshake_timeout(5000); + } + + void run() { + std::thread rmsClient_thread([this]() { rmsClient.run(); }); + std::thread rmsClient0_thread([this]() { rmsClient0.run(); }); + // 启动 WebSocket 服务器线程 + std::thread server_thread([this]() { mlpServer.run(); }); + + // 新增:启动输出监听线程,向AddMess发送数据 + std::thread output_thread([this]() { + while (true) { + resultContainer = sharedQueue.pop_output(); + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + MemRef subResultContainer0({SubMaxTokenLength, HiddenSize}); + MemRef subResultContainer1({SubMaxTokenLength, HiddenSize}); + resultContainer.splitMemRef(std::move(resultContainer), + subResultContainer0, subResultContainer1, 0, + 20); + auto it = hdlsSymbol.find("AddMess0"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["AddMess0"], dataId++, + {subResultContainer0.getDataVector()}); + send_data(hdlsSymbol["AddMess1"], dataId++, + {subResultContainer1.getDataVector()}); + std::cout << "转发成功." << std::endl; + } else { + std::cout << "AddMess0未连接, 丢弃结果." << std::endl; + } + } + }); + rmsClient_thread.join(); + rmsClient0_thread.join(); + server_thread.join(); + output_thread.join(); + } + +private: + server mlpServer; + client rmsClient; + client rmsClient0; + const std::string name; + SharedQueue &sharedQueue; + std::map hdlsSymbol; + std::map> + connections; + std::mutex symbolMutex; // 保护 hdlsSymbol 的互斥锁 + MemRef resultContainer; + // 确保对dataId的操作是​​原子​​的 + std::atomic dataId; + std::mutex dataMutex; + + void send_data(websocketpp::connection_hdl hdl, uint32_t dataId, + const std::vector> &data) { + const uint8_t total = data.size(); + + auto con = mlpServer.get_con_from_hdl(hdl); + if (!con || con->get_state() != websocketpp::session::state::open) { + std::cerr << "连接已关闭,无法发送数据。" << std::endl; + return; + } + + for (uint8_t i = 0; i < total; ++i) { + const auto &subdata = data[i]; + + // 构造协议头 + std::vector packet(10); // 4+1+1+2=8字节头 + memcpy(packet.data(), &dataId, 4); + packet[4] = total; + packet[5] = i; + uint32_t num = subdata.size(); + memcpy(packet.data() + 6, &num, 4); + + // 添加浮点数据 + const uint8_t *binaryData = + reinterpret_cast(subdata.data()); + packet.insert(packet.end(), binaryData, + binaryData + subdata.size() * sizeof(float)); + + mlpServer.send(hdl, packet.data(), packet.size(), + websocketpp::frame::opcode::binary); + } + } + + void on_server_message(websocketpp::connection_hdl hdl, + server::message_ptr msg) { + std::string payload = msg->get_payload(); + if (payload.find("AddMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol[payload] = hdl; + connections[hdl] = payload; + std::cout << payload << " 已连接" << std::endl; + return; + } + } + + std::vector getFloatData(client::message_ptr msg) { + if (msg->get_opcode() != websocketpp::frame::opcode::binary) { + std::cout << "忽略非二进制消息" << std::endl; + return {}; + } + + const std::string &payload = msg->get_payload(); + if (payload.size() < 10) { + std::cerr << "错误: 协议头不完整(需要至少10字节)" << std::endl; + return {}; + } + + // 解析协议头 + uint32_t batch_id; + uint8_t totalChunks, seqChunk; + uint32_t num_elements; + + memcpy(&batch_id, payload.data(), 4); + totalChunks = payload[4]; + seqChunk = payload[5]; + memcpy(&num_elements, payload.data() + 6, 4); + + // 验证分块序号有效性 + if (seqChunk >= totalChunks) { + std::cerr << "错误:非法分块序号 " << (int)seqChunk + << " (总块数=" << (int)totalChunks << ")" << std::endl; + return {}; + } + + // 验证数据长度 + const size_t expectedSize = 10 + num_elements * sizeof(float); + if (payload.size() != expectedSize) { + std::cerr << "错误:数据长度不匹配(预期=" << expectedSize + << " 实际=" << payload.size() << ")" << std::endl; + return {}; + } + + // 提取浮点数据 + const float *float_data = + reinterpret_cast(payload.data() + 10); + std::vector chunk(float_data, float_data + num_elements); + return chunk; + } + + void on_rmsClient_message(websocketpp::connection_hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input0(subResultContainer); + std::cout << "接收到RMSMess0数据." << std::endl; + } + + void on_rmsClient0_message(websocketpp::connection_hdl, + client::message_ptr msg) { + std::lock_guard lock(dataMutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input1(subResultContainer); + std::cout << "接收到RMSMess1数据." << std::endl; + } +}; + +//--------------------- Comp (子线程) ------------------------------------------ +// -splitNum: 标志当前MLP模块是第几个子模块 +//------------------------------------------------------------------------------ +class Comp { +public: + Comp(SharedQueue &queue, const std::string splitNum = "0") : sharedQueue(queue), splitNum(splitNum) {} + + void init() { loadAllParameters(); } + + void run() { + while (true) { + MemRef rmsInput0 = sharedQueue.pop_input0(); + MemRef rmsInput1 = sharedQueue.pop_input1(); + MemRef input0({1, MaxTokenLength, HiddenSize}); + MemRef resultContainer({MaxTokenLength, HiddenSize}); + input0.concatenateMemRefs(rmsInput0, rmsInput1, input0, 1); + _mlir_ciface_forward5(&resultContainer, ¶msContainers[index], + &input0); + std::cout << "第" << index << "次forward5 computed." << std::endl; + sharedQueue.push_output(resultContainer); + index = (index + 1) % 32; + } + } + +private: + SharedQueue &sharedQueue; + std::vector> paramsContainers; + uint32_t index = 0; + const std::string splitNum; + + void loadAllParameters() { + constexpr size_t paramSize_group[] = { + 131072064, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 131076096}; + size_t group_len = sizeof(paramSize_group) / sizeof(paramSize_group[0]); + BaseDisModel::getParameters(paramSize_group, group_len, 67633152, + splitNum, paramsContainers); + // /// Define directories of vacabulary and parameter file. + // std::string llamaBuildDir = LLAMA_EXAMPLE_BUILD_PATH; + + // for (int i = 0; i < 194; i++) { // N 为需要生成的数量 + // if (paramSize_group[i] == 67633152) { + // std::string paramsDir = llamaBuildDir + "/subgraph" + + // std::to_string(i) + "_arg" + splitNum + ".data"; + // MemRef paramsContainer({paramSize_group[i]}); + // loadParameters(paramsDir, paramsContainer); + // paramsContainers.push_back(std::move(paramsContainer)); + // } + // } + } +}; + +#endif // LLAMAMLP_H diff --git a/examples/SplitLlama/llamaOutput.h b/examples/SplitLlama/llamaOutput.h new file mode 100644 index 00000000..c823bb72 --- /dev/null +++ b/examples/SplitLlama/llamaOutput.h @@ -0,0 +1,336 @@ +#ifndef LLAMAOUTPUT_H // 作用:防止llamaOutput.h被重复引用 +#define LLAMAOUTPUT_H +#include "BaseDisModel.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +using websocketpp::lib::bind; +using websocketpp::lib::placeholders::_1; +using websocketpp::lib::placeholders::_2; + +typedef websocketpp::server server; +typedef websocketpp::client client; + +constexpr size_t MaxVocabSize = 32000; +constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; +constexpr size_t HiddenSize = 4096; +constexpr size_t HiddenSize0 = 128; +constexpr size_t HiddenSize1 = 41; +constexpr size_t ParamSize = 131076096; + +/// Print [Log] label in bold blue format. +void printLogLabel() { std::cout << "\033[34;1m[Log] \033[0m"; } + +/// Declare LLaMA forward function. +extern "C" void _mlir_ciface_forward193(MemRef *, MemRef *, + MemRef *); + +/// Find the index of the max value. +int findMaxIndex(const float *start, const float *end) { + return std::distance(start, std::max_element(start, end)); +} + +// 共享内存结构(线程安全队列) +class SharedQueue { +public: + void push_input(const MemRef &data) { + std::lock_guard lock(input_mutex); + inputQueue.push(data); + input_cv.notify_one(); + } + + MemRef pop_input() { + std::unique_lock lock(input_mutex); + input_cv.wait(lock, [this] { return !inputQueue.empty(); }); + auto data = inputQueue.front(); + inputQueue.pop(); + return data; + } + + void push_input0(const MemRef &data) { + std::lock_guard lock(input0_mutex); + input0Queue.push(data); + input0_cv.notify_one(); + } + + MemRef pop_input0() { + std::unique_lock lock(input0_mutex); + input0_cv.wait(lock, [this] { return !input0Queue.empty(); }); + auto data = input0Queue.front(); + input0Queue.pop(); + return data; + } + + void push_output(const MemRef &data) { + std::lock_guard lock(output_mutex); + outputQueue.push(data); + output_cv.notify_one(); + } + + MemRef pop_output() { + std::unique_lock lock(output_mutex); + output_cv.wait(lock, [this] { return !outputQueue.empty(); }); + auto data = outputQueue.front(); + outputQueue.pop(); + return data; + } + +private: + std::queue> inputQueue; + std::queue> input0Queue; + std::queue> outputQueue; + std::mutex input_mutex; + std::mutex input0_mutex; + std::mutex output_mutex; + std::condition_variable input_cv; + std::condition_variable input0_cv; + std::condition_variable output_cv; +}; + +//--------------------- OutputMess (主线程) --------------------- +class OutputMess { +public: + OutputMess(SharedQueue &queue, const std::string &uri, + const std::string &uri0, const std::string &uri1) + : addClient(), addClient0(), inputClient(), sharedQueue(queue), + outputContainer(), inputHdl(), currentToken(), + resultContainer(MemRef({1, MaxTokenLength, HiddenSize})) { + /// 客户端初始化 + addClient.set_access_channels(websocketpp::log::alevel::none); + addClient.clear_access_channels(websocketpp::log::alevel::all); + addClient.init_asio(); + addClient.set_open_handler([this](websocketpp::connection_hdl hdl) { + addClient.send(hdl, "OutputMess", websocketpp::frame::opcode::text); + }); + addClient.set_message_handler( + bind(&OutputMess::on_addClient_message, this, _1, _2)); + websocketpp::lib::error_code addec; + auto addcon = addClient.get_connection(uri, addec); + addClient.connect(addcon); + + addClient0.set_access_channels(websocketpp::log::alevel::none); + addClient0.clear_access_channels(websocketpp::log::alevel::all); + addClient0.init_asio(); + addClient0.set_open_handler([this](websocketpp::connection_hdl hdl) { + addClient0.send(hdl, "OutputMess", websocketpp::frame::opcode::text); + }); + addClient0.set_message_handler( + bind(&OutputMess::on_addClient0_message, this, _1, _2)); + websocketpp::lib::error_code addec0; + auto addcon0 = addClient0.get_connection(uri0, addec0); + addClient0.connect(addcon0); + + inputClient.set_access_channels(websocketpp::log::alevel::none); + inputClient.clear_access_channels(websocketpp::log::alevel::all); + inputClient.init_asio(); + inputClient.set_open_handler([this](websocketpp::connection_hdl hdl) { + inputClient.send(hdl, "OutputMess", websocketpp::frame::opcode::text); + std::lock_guard lock(hdlMutex); // 加锁保护符号表 + inputHdl = hdl; + }); + inputClient.set_message_handler( + bind(&OutputMess::on_inputClient_message, this, _1, _2)); + websocketpp::lib::error_code inputec; + auto inputcon = inputClient.get_connection(uri1, inputec); + inputClient.connect(inputcon); + + outputContainer.loadVocab(vocabDir); + } + + void run() { + std::thread addClient_thread([this]() { addClient.run(); }); + std::thread addClient0_thread([this]() { addClient0.run(); }); + std::thread inputClient_thread([this]() { inputClient.run(); }); + + // 新增:启动输出监听线程,向InputMess发送数据 + std::thread output_thread([this]() { + while (true) { + resultContainer = sharedQueue.pop_output(); + std::lock_guard lock(hdlMutex); // 加锁保护符号表 + int tokenIndex = currentToken + tokenCnt - 1; + currentToken++; + // Determine the generated token. + const float *startPtr = + resultContainer.getData() + tokenIndex * MaxVocabSize; + const float *endPtr = startPtr + MaxVocabSize; + int maxIndex = findMaxIndex(startPtr, endPtr); + + // Stop if a separator token (2, ) or line break token (13 <0x0A>) + // is generated. + if (maxIndex == 2) { + break; + } + // Append the generated token into the input and output container. + outputContainer.appendTokenIdx(maxIndex); + + if (currentToken == (MaxTokenLength - tokenCnt)) { + std::cout << "\033[33;1m[Output]\033[0m " + << outputContainer.revertLlama() << std::endl; + currentToken = 0; + } else if (currentToken < (MaxTokenLength - tokenCnt)) { + inputClient.send(inputHdl, std::to_string(maxIndex), + websocketpp::frame::opcode::text); + std::cout << "第" << currentToken << "次Token推理完成." << std::endl; + } else { + std::cout << "Transformer层计算次数过多, 当前次数为: " << currentToken + << std::endl; + } + } + }); + addClient_thread.join(); + addClient0_thread.join(); + output_thread.join(); + } + +private: + client addClient; + client addClient0; + client inputClient; + SharedQueue &sharedQueue; + websocketpp::connection_hdl inputHdl; + std::mutex hdlMutex; // 保护 hdlsSymbol 的互斥锁 + MemRef resultContainer; + Text outputContainer; + // // 确保对dataId的操作是​​原子​​的 + // std::atomic dataId = 0; + std::mutex data_mutex; + // 记录已经进行过的llama推理次数 + uint32_t currentToken = 0; + // 记录输入的token数量 + uint32_t tokenCnt = 0; + /// Define directories of vacabulary and file. + std::string llamaDir = LLAMA_SPLIT_EXAMPLE_PATH; + const std::string vocabDir = llamaDir + "/vocab.txt"; + + std::vector getFloatData(client::message_ptr msg) { + if (msg->get_opcode() != websocketpp::frame::opcode::binary) { + std::cout << "忽略非二进制消息" << std::endl; + return {}; + } + + const std::string &payload = msg->get_payload(); + if (payload.size() < 10) { + std::cerr << "错误: 协议头不完整(需要至少10字节)" << std::endl; + return {}; + } + + // 解析协议头 + uint32_t batch_id; + uint8_t totalChunks, seqChunk; + uint32_t num_elements; + + memcpy(&batch_id, payload.data(), 4); + totalChunks = payload[4]; + seqChunk = payload[5]; + memcpy(&num_elements, payload.data() + 6, 4); + + // 验证分块序号有效性 + if (seqChunk >= totalChunks) { + std::cerr << "错误:非法分块序号 " << (int)seqChunk + << " (总块数=" << (int)totalChunks << ")" << std::endl; + return {}; + } + + // 验证数据长度 + const size_t expectedSize = 10 + num_elements * sizeof(float); + if (payload.size() != expectedSize) { + std::cerr << "错误:数据长度不匹配(预期=" << expectedSize + << " 实际=" << payload.size() << ")" << std::endl; + return {}; + } + + // 提取浮点数据 + const float *float_data = + reinterpret_cast(payload.data() + 10); + std::vector chunk(float_data, float_data + num_elements); + return chunk; + } + + void on_addClient_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + std::lock_guard lock(data_mutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input(subResultContainer); + std::cout << "接收到AddMess0数据" << std::endl; + } + + void on_addClient0_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + std::lock_guard lock(data_mutex); + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + sharedQueue.push_input0(subResultContainer); + std::cout << "接收到AddMess1数据" << std::endl; + } + + void on_inputClient_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + std::string payload = msg->get_payload(); + try { + if (payload.empty()) + throw std::invalid_argument("空输入"); + tokenCnt = std::stoi(payload); + } catch (const std::exception &e) { + std::cout << "无效输入: " << payload << " (" << e.what() << ")" + << std::endl; + } + } +}; + +//--------------------- Comp (子线程) --------------------- +class Comp { +public: + Comp(SharedQueue &queue) : sharedQueue(queue), paramsContainer({ParamSize}) {} + void init() { loadAllParameters(); } + void run() { + while (true) { + MemRef addInput = sharedQueue.pop_input(); + MemRef addInput0 = sharedQueue.pop_input0(); + MemRef input0({1, MaxTokenLength, HiddenSize}); + MemRef resultContainer({1, MaxTokenLength, HiddenSize}); + input0.concatenateMemRefs(addInput, addInput0, input0, 1); + _mlir_ciface_forward193(&resultContainer, ¶msContainer, &input0); + std::cout << "forward193 computed." << std::endl; + sharedQueue.push_output(resultContainer); + } + } + +private: + SharedQueue &sharedQueue; + MemRef paramsContainer; + + void loadAllParameters() { + /// Define directories of parameter file. + std::string llamaBuildDir = LLAMA_EXAMPLE_BUILD_PATH; + std::string paramsDir = + llamaBuildDir + "/subgraph193_arg0.data"; // 权重文件路径 + BaseDisModel::loadParameters(paramsDir, paramsContainer); + } +}; + +#endif // LLAMAOUTPUT_H diff --git a/examples/SplitLlama/llamaRMS.h b/examples/SplitLlama/llamaRMS.h new file mode 100644 index 00000000..ae37a0af --- /dev/null +++ b/examples/SplitLlama/llamaRMS.h @@ -0,0 +1,394 @@ +#ifndef LLAMARMS_H // 作用:防止llamaRMS.h被重复引用 +#define LLAMARMS_H +#include +#include +#include +#include +#include "BaseDisModel.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace buddy; +using websocketpp::lib::bind; +using websocketpp::lib::placeholders::_1; +using websocketpp::lib::placeholders::_2; + +typedef websocketpp::server server; +typedef websocketpp::client client; + +constexpr size_t MaxVocabSize = 32000; +constexpr size_t MaxTokenLength = 40; +constexpr size_t SubMaxTokenLength = 20; +constexpr size_t HiddenSize = 4096; +constexpr size_t HiddenSize0 = 128; +constexpr size_t HiddenSize1 = 41; + +/// Declare LLaMA forward function. +extern "C" void _mlir_ciface_forward1(MemRef *, MemRef *, + MemRef *); + +// 共享内存结构(线程安全队列) +class SharedQueue { +public: + void push_input(const std::any &data) { + std::lock_guard lock(inputMutex); + inputQueue.push(data); + input_cv.notify_one(); + } + + std::any pop_input() { + std::unique_lock lock(inputMutex); + input_cv.wait(lock, [this] { return !inputQueue.empty(); }); + auto data = inputQueue.front(); + inputQueue.pop(); + return data; + } + + void push_output(const std::any &data) { + std::lock_guard lock(outputMutex); + outputQueue.push(data); + output_cv.notify_one(); + } + + std::any pop_output() { + std::unique_lock lock(outputMutex); + output_cv.wait(lock, [this] { return !outputQueue.empty(); }); + auto data = outputQueue.front(); + outputQueue.pop(); + return data; + } + +private: + std::queue inputQueue; + std::queue outputQueue; + std::mutex inputMutex; + std::mutex outputMutex; + std::condition_variable input_cv; + std::condition_variable output_cv; +}; + +//--------------------- RMSMess (主线程) --------------------- +class RMSMess { +public: + RMSMess(const std::string name, SharedQueue &queue, const uint16_t &port, + const std::string &uri) + : rmsServer(), name(name), inputClient(), sharedQueue(queue), + hdlsSymbol(), + resultContainer(MemRef({1, SubMaxTokenLength, HiddenSize})), + dataId(0) { + /// 服务器初始化 + rmsServer.set_access_channels(websocketpp::log::alevel::none); + rmsServer.clear_access_channels(websocketpp::log::alevel::all); + rmsServer.init_asio(); + + // rmsServer.set_close_handler([this](websocketpp::connection_hdl hdl) { + // std::lock_guard lock(symbolMutex); // 加锁保护符号表 + // auto it = connections.find(hdl); + // if (it != connections.end()) { + // std::string user_id = it->second; + // hdlsSymbol.erase(user_id); + // connections.erase(hdl); + // } + // }); + + rmsServer.set_message_handler( + bind(&RMSMess::on_server_message, this, _1, _2)); + rmsServer.listen(port); + rmsServer.set_reuse_addr(true); + rmsServer.start_accept(); + + /// 客户端初始化 + // 禁用客户端日志 + inputClient.set_access_channels(websocketpp::log::alevel::none); + inputClient.clear_access_channels(websocketpp::log::alevel::all); + // 初始化服务器并绑定ioService + inputClient.init_asio(); + // 设置服务器的消息回调 + inputClient.set_open_handler([this, name](websocketpp::connection_hdl hdl) { + inputClient.send(hdl, name, websocketpp::frame::opcode::text); + }); + inputClient.set_message_handler( + bind(&RMSMess::on_client_message, this, _1, _2)); + // 启动连接 + websocketpp::lib::error_code ec; + auto con = inputClient.get_connection(uri, ec); + inputClient.connect(con); + } + + void run() { + std::thread client_thread([this]() { inputClient.run(); }); + // 启动 WebSocket 服务器线程 + std::thread server_thread([this]() { rmsServer.run(); }); + + // 新增:启动输出监听线程,向MHAMess或MLPMess发送数据 + std::thread output_thread([this]() { + while (true) { + resultContainer = std::any_cast>(sharedQueue.pop_output()); + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + if (hdlsSymbol.find("MHAMess0") != hdlsSymbol.end()) { + send_data(hdlsSymbol["MHAMess0"], dataId++, + {resultContainer.getDataVector()}); + send_data(hdlsSymbol["MHAMess1"], dataId++, + {resultContainer.getDataVector()}); + std::cout << name << "转发" << "MHAMess" << "成功" << std::endl; + } else if (hdlsSymbol.find("MLPMess0") != hdlsSymbol.end()) { + send_data(hdlsSymbol["MLPMess0"], dataId++, + {resultContainer.getDataVector()}); + send_data(hdlsSymbol["MLPMess1"], dataId++, + {resultContainer.getDataVector()}); + std::cout << name << "转发" << "MLPMess" << "成功" << std::endl; + } else { + std::cout << "MHAMess0或MLPMess0未连接, 丢弃结果: " << "result" + << std::endl; + } + } + }); + + client_thread.join(); + server_thread.join(); + output_thread.join(); + } + +private: + server rmsServer; + client inputClient; + const std::string name; + SharedQueue &sharedQueue; + std::map hdlsSymbol; + std::map> + connections; + websocketpp::connection_hdl lastAddHdl; + std::mutex symbolMutex; // 保护 hdlsSymbol 的互斥锁 + std::mutex sequenceMutex; + MemRef resultContainer; + // 确保对dataId的操作是​​原子​​的 + std::atomic dataId; + std::mutex dataMutex; + // 存储每个参数的shape + std::vector inputSizes = {{1, SubMaxTokenLength, HiddenSize}}; + // 是否是第一个rms模块 + bool isFirst; + + void send_data(websocketpp::connection_hdl hdl, uint32_t dataId, + const std::vector> &data) { + const uint8_t total = data.size(); + + if (rmsServer.get_con_from_hdl(hdl)->get_state() != + websocketpp::session::state::open) + return; + + for (uint8_t i = 0; i < total; ++i) { + const auto &subdata = data[i]; + + // 构造协议头 + std::vector packet(10); // 4+1+1+2=8字节头 + memcpy(packet.data(), &dataId, 4); + packet[4] = total; + packet[5] = i; + uint32_t num = subdata.size(); + memcpy(packet.data() + 6, &num, 4); + + // 添加浮点数据 + const uint8_t *binaryData = + reinterpret_cast(subdata.data()); + packet.insert(packet.end(), binaryData, + binaryData + subdata.size() * sizeof(float)); + + rmsServer.send(hdl, packet.data(), packet.size(), + websocketpp::frame::opcode::binary); + } + } + + std::vector getFloatData(client::message_ptr msg) { + if (msg->get_opcode() != websocketpp::frame::opcode::binary) { + std::cout << "忽略非二进制消息" << std::endl; + return {}; + } + + const std::string &payload = msg->get_payload(); + if (payload.size() < 10) { + std::cerr << "错误: 协议头不完整(需要至少10字节)" << std::endl; + return {}; + } + + // 解析协议头 + uint32_t batch_id; + uint8_t totalChunks, seqChunk; + uint32_t num_elements; + + memcpy(&batch_id, payload.data(), 4); + totalChunks = payload[4]; + seqChunk = payload[5]; + memcpy(&num_elements, payload.data() + 6, 4); + + // 验证分块序号有效性 + if (seqChunk >= totalChunks) { + std::cerr << "错误:非法分块序号 " << (int)seqChunk + << " (总块数=" << (int)totalChunks << ")" << std::endl; + return {}; + } + + // 验证数据长度 + const size_t expectedSize = 10 + num_elements * sizeof(float); + if (payload.size() != expectedSize) { + std::cerr << "错误:数据长度不匹配(预期=" << expectedSize + << " 实际=" << payload.size() << ")" << std::endl; + return {}; + } + + // 提取浮点数据 + const float *float_data = + reinterpret_cast(payload.data() + 10); + std::vector chunk(float_data, float_data + num_elements); + return chunk; + } + + void on_server_message(websocketpp::connection_hdl hdl, + server::message_ptr msg) { + if (msg->get_opcode() == websocketpp::frame::opcode::text) { + std::string payload = msg->get_payload(); + if (payload.find("AddMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol["AddMess"] = hdl; + connections[hdl] = payload; + std::cout << payload << "已连接" << std::endl; + } else if (payload.find("MHAMess") != std::string::npos || + payload.find("MLPMess") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + hdlsSymbol[payload] = hdl; + connections[hdl] = payload; + std::cout << payload << "已连接" << std::endl; + } else if (payload.find("LastAdd") != std::string::npos) { + std::lock_guard lock(symbolMutex); // 加锁保护符号表 + connections[hdl] = payload; + std::cout << payload << "已连接" << std::endl; + } + return; + } + + if (msg->get_opcode() == websocketpp::frame::opcode::binary) { + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + + std::cout << "接收到AddMess数据" << std::endl; + { + std::lock_guard lockMutex(symbolMutex); // 加锁保护符号表 + auto it = hdlsSymbol.find("AddMess"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["AddMess"], dataId++, + {subResultContainer.getDataVector()}); + std::cout << name << "转发AddMess成功." << std::endl; + } else { + std::cout << "AddMess未连接, 丢弃结果." << std::endl; + } + } + sharedQueue.push_input(subResultContainer); + } + } + + void on_client_message(websocketpp::connection_hdl hdl, + client::message_ptr msg) { + auto chunk = getFloatData(msg); + intptr_t sizes[3] = {1, SubMaxTokenLength, HiddenSize}; + MemRef subResultContainer(chunk.data(), sizes); + + std::cout << "接收到InputMess数据" << std::endl; + { + std::lock_guard lockMutex(symbolMutex); // 加锁保护符号表 + auto it = hdlsSymbol.find("AddMess"); + if (it != hdlsSymbol.end()) { + send_data(hdlsSymbol["AddMess"], dataId++, + {subResultContainer.getDataVector()}); + std::cout << name << "转发AddMess成功." << std::endl; + } else { + std::cout << "AddMess未连接, 丢弃结果." << std::endl; + } + } + sharedQueue.push_input(subResultContainer); + } +}; + +//----------------------Comp (子线程)---------------------------------- +// -rmsNum: 标志当前RMS模块是第几组RMS模块(1标志前一组模块,0标志后一组模块) +//--------------------------------------------------------------------- +class Comp { +public: + Comp(SharedQueue &queue, const int rmsNum) : sharedQueue(queue), rmsNum(rmsNum) {} + + void init() {loadAllParameters();} + + void run() { + while (true) { + auto input = std::any_cast>(sharedQueue.pop_input()); + MemRef resultContainer({1, SubMaxTokenLength, HiddenSize}); + _mlir_ciface_forward1(&resultContainer, ¶msContainers[index], &input); + std::cout << "第" << index << "次forward1 computed." << std::endl; + sharedQueue.push_output(resultContainer); + index = (index + 1) % 32; + } + } + +private: + SharedQueue &sharedQueue; + std::vector> paramsContainers; + uint32_t index = 0; + const int rmsNum; + + void loadAllParameters(){ + constexpr size_t paramSize_group[] = { + 131072064, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 4096, 33554432, 0, 4096, 67633152, + 0, 4096, 33554432, 0, 4096, 67633152, 0, 4096, 33554432, + 0, 4096, 67633152, 0, 131076096}; + /// Define directories of vacabulary and parameter file. + std::string llamaBuildDir = LLAMA_EXAMPLE_BUILD_PATH; + + for (int i = 0; i < 194; i++) { // N 为需要生成的数量 + if (paramSize_group[i] == 4096 && i % 2 == rmsNum) { + std::string paramsDir = + llamaBuildDir + "/subgraph" + std::to_string(i) + "_arg0.data"; + MemRef paramsContainer({paramSize_group[i]}); + BaseDisModel::loadParameters(paramsDir, paramsContainer); + paramsContainers.push_back(std::move(paramsContainer)); + } + } + } +}; + +#endif // LLAMARMS_H diff --git a/examples/SplitLlama/main.py b/examples/SplitLlama/main.py new file mode 100644 index 00000000..28bb69ea --- /dev/null +++ b/examples/SplitLlama/main.py @@ -0,0 +1,105 @@ +import subprocess +import os +import signal +import sys +import time +from typing import List, Tuple + +process_list = [ + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-input-run'], # 第一个启动的输入进程 + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-rms-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-rms-0-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-mha-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-mha-0-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-add-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-add-0-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-rms-mlp-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-rms-mlp-0-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-mlp-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-mlp-0-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-add-mlp-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-add-mlp-0-run'], + ['/home/chenweiwei/rvv/buddy-mlir/build/bin/buddy-llama-output-run'] +] + +class ProcessController: + def __init__(self): + self.running_procs: List[Tuple[subprocess.Popen, str]] = [] + signal.signal(signal.SIGINT, self.signal_handler) + signal.signal(signal.SIGTERM, self.signal_handler) + + def generate_log_path(self, cmd: List[str]) -> str: + """生成带时间戳的日志路径[6](@ref)""" + base_name = os.path.basename(cmd[0]) + timestamp = time.strftime("%Y%m%d-%H%M%S") + return f"logs/{base_name}-{timestamp}.log" + + def signal_handler(self, signum: int, frame: any) -> None: + """终止所有进程[7](@ref)""" + print("\n接收到终止信号,正在清理进程...") + self.terminate_all() + sys.exit(1) + + def start_process(self, cmd: List[str]) -> subprocess.Popen: + """启动单个进程并重定向输出[4](@ref)""" + log_path = self.generate_log_path(cmd) + os.makedirs(os.path.dirname(log_path), exist_ok=True) + + with open(log_path, 'w') as log_file: + proc = subprocess.Popen( + cmd, + stdin=subprocess.PIPE if cmd == process_list[0] else None, + stdout=log_file, + stderr=subprocess.STDOUT, + text=True + ) + self.running_procs.append((proc, log_path)) + print(f"已启动进程: {' '.join(cmd)} [PID:{proc.pid}] 日志: {log_path}") + return proc + + def terminate_all(self) -> None: + """终止所有运行中的进程[8](@ref)""" + for proc, log_path in self.running_procs: + try: + if proc.poll() is None: + proc.terminate() + proc.wait(timeout=3) + print(f"已终止进程 [PID:{proc.pid}]") + except Exception as e: + print(f"终止进程错误: {str(e)}") + + def execute_sequence(self) -> None: + """执行主流程控制[2](@ref)""" + try: + # 第一阶段:启动输入进程(保持stdin管道) + input_proc = self.start_process(process_list[0]) + + # 第二阶段:顺序执行其他进程 + for cmd in process_list[1:]: + proc = self.start_process(cmd) + + # 阻塞等待完成[1](@ref) + while proc.poll() is None: + try: + proc.wait(timeout=0.5) + if proc.returncode != 0: + raise RuntimeError(f"进程异常退出: {cmd[0]} (代码:{proc.returncode})") + except subprocess.TimeoutExpired: + continue + + print(f"进程完成: {cmd[0]}") + + # 第三阶段:所有进程完成后处理输入 + if input_proc.poll() is None: + user_input = input("\n所有进程执行完毕,请输入数据:") + input_proc.communicate(input=user_input + "\n") + print("输入处理完成") + + except Exception as e: + print(f"执行错误: {str(e)}") + self.terminate_all() + sys.exit(1) + +if __name__ == "__main__": + controller = ProcessController() + controller.execute_sequence() diff --git a/frontend/Interfaces/buddy/Core/Container.h b/frontend/Interfaces/buddy/Core/Container.h index 6e3ff18d..d46ef68c 100644 --- a/frontend/Interfaces/buddy/Core/Container.h +++ b/frontend/Interfaces/buddy/Core/Container.h @@ -46,6 +46,7 @@ template class MemRef { MemRef(std::vector sizes, bool needMalloc, intptr_t offset); // Constructor from data. MemRef(const T *data, intptr_t sizes[N], intptr_t offset = 0); + MemRef(const T *data, std::vector sizes, intptr_t offset = 0); // Constructor from a unique_ptr, taking over. MemRef(std::unique_ptr &uptr, intptr_t sizes[N], intptr_t offset = 0); // Copy constructor. @@ -58,8 +59,16 @@ template class MemRef { MemRef &operator=(MemRef &&other) noexcept; // Desctrutor. ~MemRef(); + // Concat two MemRefs into a MemRef. + void concatenateMemRefs(MemRef &other0, MemRef &other1, MemRef &other2, size_t concatDim); + // Split a MemRef into two MemRefs. + void splitMemRef(MemRef &&other0, MemRef &other1, MemRef &other2, size_t splitDim, size_t splitIndex); + // Add two MemRef + void addMemRef(MemRef& a, MemRef& b); // Get the data pointer. T *getData(); + // Get the data. + std::vector getDataVector(); // Get the sizes (shape). const intptr_t *getSizes() { return sizes; } // Get the strides. @@ -184,6 +193,24 @@ MemRef::MemRef(const T *data, intptr_t sizes[N], intptr_t offset) { } } +template +MemRef::MemRef(const T *data, std::vector sizes, intptr_t offset) { + if (sizes.size() != N) { + throw std::runtime_error("Invalid number of dimensions."); + } + this->offset = offset; + for (size_t i = 0; i < N; i++) { + this->sizes[i] = sizes[i]; + } + setStrides(); + size_t size = product(this->sizes); + allocated = (T *)malloc(sizeof(T) * size); + aligned = allocated; + for (size_t i = 0; i < size; i++) { + aligned[i] = data[i]; + } +} + // Copy Constructor. // This constructor is used to initialize a MemRef object with another MemRef // object. @@ -354,4 +381,151 @@ template T *MemRef::release() { return temp; } +template +void MemRef::concatenateMemRefs(MemRef &other0, MemRef &other1, + MemRef &other2, size_t concatDim) { + // 检查拼接维度是否合法 + if (concatDim >= N) { + throw std::runtime_error("Invalid concatenation dimension."); + } + + // 检查非拼接维度的形状是否一致 + for (size_t i = 0; i < N; i++) { + if (i != concatDim && other0.getSizes()[i] != other1.getSizes()[i]) { + throw std::runtime_error("Shapes are not compatible for concatenation."); + } + } + + // 设置 other2 的尺寸 + intptr_t *sizes2 = const_cast(other2.getSizes()); + for (size_t i = 0; i < N; i++) { + if (i == concatDim) { + sizes2[i] = other0.getSizes()[i] + other1.getSizes()[i]; + } else { + sizes2[i] = other0.getSizes()[i]; + } + } + + // 设置 other2 的 strides + other2.setStrides(); + + // 计算拼接维度的偏移量 + intptr_t concatOffset0 = other0.getStrides()[concatDim] * other0.getSizes()[concatDim]; + intptr_t concatOffset1 = other1.getStrides()[concatDim] * other1.getSizes()[concatDim]; + + // 将 other0 的数据复制到 other2 + size_t offset0 = 0; + size_t offset1 = 0; + size_t offset2 = 0; + while (offset2 < other2.getSize()) { + size_t tmp = 0; + while (tmp < concatOffset0) { + other2[offset2 + tmp] = other0[offset0 + tmp]; + tmp++; + } + offset0 += concatOffset0; + offset2 += concatOffset0; + tmp = 0; + while (tmp < concatOffset1) { + other2[offset2 + tmp] = other1[offset1 + tmp]; + tmp++; + } + offset1 += concatOffset1; + offset2 += concatOffset1; + } + + // // 将 other1 的数据复制到 other2 + // for (size_t i = 0; i < other1.getSize(); i++) { + // // 计算目标索引 + // intptr_t targetIndex = i; + // intptr_t stride = 1; + // for (size_t dim = N; dim-- > 0;) { + // intptr_t coord = (i / stride) % other1.getSizes()[dim]; + // if (dim == concatDim) { + // coord += concatOffset; + // } + // targetIndex += coord * other2.getStrides()[dim]; + // stride *= other1.getSizes()[dim]; + // } + // std::cout << "targetIndex: " << targetIndex << std::endl; + // other2[targetIndex] = other1[i]; + // } +} + +template +void MemRef::splitMemRef(MemRef &&other0, MemRef &other1, + MemRef &other2, size_t splitDim, size_t splitIndex) { + // 检查分割维度是否合法 + if (splitDim >= N) { + throw std::runtime_error("Invalid split dimension."); + } + + // 检查分割点是否合法 + if (splitIndex > static_cast(other0.getSizes()[splitDim])) { + throw std::runtime_error("Split index out of bounds."); + } + + // 设置 other1 的尺寸 + intptr_t *sizes1 = const_cast(other1.getSizes()); + const intptr_t *sizes0 = other0.getSizes(); + for (size_t i = 0; i < N; i++) { + sizes1[i] = sizes0[i]; + } + sizes1[splitDim] = splitIndex; + + // 设置 other2 的尺寸 + intptr_t *sizes2 = const_cast(other2.getSizes()); + for (size_t i = 0; i < N; i++) { + sizes2[i] = sizes0[i]; + } + sizes2[splitDim] = sizes0[splitDim] - splitIndex; + + // 设置 strides + other1.setStrides(); + other2.setStrides(); + + // 移动 other0 的资源到 other1 + other1.allocated = other0.allocated; + other1.aligned = other0.aligned; + other1.offset = other0.offset; + + // 设置 other2 的 aligned 和 offset + other2.allocated = nullptr; + other2.aligned = other0.aligned + other1.getSize(); + other2.offset = other0.offset; + + // 清空 other0 的资源 + other0.allocated = nullptr; + other0.aligned = nullptr; +} + +/// Add two MemRef +template +void MemRef::addMemRef(MemRef& a, MemRef& b) { + // 校验输入输出维度一致 + const intptr_t * aSizes = a.getSizes(); + const intptr_t * bSizes = b.getSizes(); + for (size_t i = 0; i < N; i++) { + if (aSizes[i] != bSizes[i]) { + throw std::runtime_error("Shapes are not compatible for concatenation."); + } + } + + for (size_t i = 0; i < a.getSize(); i++) { + a[i] += b[i]; + } +} + +/// Get the data. +template +std::vector MemRef::getDataVector() { + size_t size = product(this->sizes); + assert((size > 0) && "Invalid container data size."); + std::vector dataVector(size); + for (size_t i = 0; i < size; i++) { + dataVector[i] = aligned[i + offset]; + } + return dataVector; +} + #endif // FRONTEND_INTERFACES_BUDDY_CORE_CONTAINER diff --git a/frontend/Python/graph/graph.py b/frontend/Python/graph/graph.py index 9a642452..8c5c11e8 100644 --- a/frontend/Python/graph/graph.py +++ b/frontend/Python/graph/graph.py @@ -23,6 +23,7 @@ import ctypes import functools import numpy as np +import torch import mlir.ir as ir import mlir.dialects.func as func @@ -123,6 +124,7 @@ def __init__( """ self._body = [] self._inputs = inputs + self._outputs = None self.node_table: Dict[str, Op] = {} self._fake_params = fake_params self.device = device @@ -134,8 +136,7 @@ def __init__( self._output_memref = None self._output_descriptor = None self.execution_engine = None - self.op_groups: Dict[str, List[Op]] = {} - self.group_map_device: Dict[str, DeviceType] = {} + self.paral_group: Dict[str, List[int]] = {} @property def body(self): @@ -236,46 +237,12 @@ def displace_node(self, node: Op, newnode: Op): self._body[self._body.index(node)] = newnode self.node_table.pop(node.name) self.node_table[newnode.name] = newnode - - def init_op_group(self): - """ - Initializes operation groups within the graph. - - Returns: - - None - """ - submodel_count = 0 - tsf_count = 0 - for i, op in enumerate(self._body): - if isinstance(op, PlaceholderOp) or isinstance(op, OutputOp): - continue - - if "subgraph{}".format(submodel_count) not in self.op_groups.keys(): - group = [op] - subgraph_name = "subgraph{}".format(submodel_count) - self.group_map_device[subgraph_name] = DeviceType.CPU - self.op_groups[subgraph_name] = group - continue - - # todo: Added handling of more complex embedding cases - - if isinstance(op, PowOp): - if tsf_count%2 == 0: - submodel_count += 1 - tsf_count += 1 - group = [op] - subgraph_name = "subgraph{}".format(submodel_count) - self.group_map_device[subgraph_name] = DeviceType.CPU - self.op_groups[subgraph_name] = group - continue - else: - tsf_count += 1 - - subgraph_name = "subgraph{}".format(submodel_count) - group = self.op_groups[subgraph_name] - group.append(op) - self.op_groups[subgraph_name] = group - + + # 检查两个列表对应位置的元素是否为同一类的实例 + def are_classes_compatible(self, list_a, list_b): + if len(list_a) != len(list_b): + return False + return all(type(a) is type(b) for a, b in zip(list_a, list_b)) def fuse_ops(self, pattern_list: List[FunctionType]): """ @@ -342,6 +309,7 @@ def lower_to_top_level_ir(self, num : int = 0): # if num == 1: # fx_importer.partition_symbol_table() outputs = fx_importer.get_output_nodes() + self._outputs = outputs self._output_memref = [] output_ranks = [] output_dtypes = [] @@ -507,7 +475,7 @@ def _str_to_mlir_dtype(self, dtype: str) -> ir.Type: case _: raise NotImplementedError(f"Unsupported dtype {dtype}") - def _pack_params(self) -> None: + def _pack_params(self) -> list: """ Packs parameters of the graph to one memref. @@ -535,6 +503,7 @@ def _pack_params(self) -> None: self._param_packs.append( ir.MemRefType.get([param_total_size], mlir_dtype) ) + def addsymbol(self) -> None: """ @@ -655,6 +624,8 @@ def generated_func(*args): ] self._symbol_table[("output", 0)] = returns elif isinstance(node, PlaceholderOp): + if node._newshape is not None: + node.tensor_meta['shape'] = torch.Size(list(node._newshape)) self._import_placeholder(node, args_list) elif isinstance(node, GetItemOp): self._symbol_table[(str(node.name), 0)] = ( diff --git a/frontend/Python/graph/graph_driver.py b/frontend/Python/graph/graph_driver.py index 7ba89162..2df1d6a9 100644 --- a/frontend/Python/graph/graph_driver.py +++ b/frontend/Python/graph/graph_driver.py @@ -20,13 +20,17 @@ # # ===--------------------------------------------------------------------------- +import os import functools import numpy as np from mlir import ir +import torch from collections import deque, defaultdict from .graph import Graph, GraphImporter, TensorMeta -from .operation import FuncOp, CallOp, PlaceholderOp, OutputOp, GetItemOp + +from .operation import * +from .type import * from .operation import * from .type import * @@ -46,8 +50,7 @@ class GraphDriver: - _subgraphs_outputs (dict): A dictionary mapping subgraph names to their output op's result. """ - - def __init__(self, graph: Graph, parallelism: int = 0) -> None: + def __init__(self, graph: Graph, parallelism: int = 2) -> None: """ Initialize the GraphDriver object with a given computational graph. @@ -60,20 +63,23 @@ def __init__(self, graph: Graph, parallelism: int = 0) -> None: """ self._graph = graph self._parallelism = parallelism - self._subgraph_dependencies = { - subgraph_name: set() - for subgraph_name in list(self._graph.op_groups.keys()) - } - self._call_table = {} + # 对原图的操作分组 + self.op_groups: Dict[str, List[Op]] = {} + self.group_map_device: Dict[str, DeviceType] = {} + self._subgraph_dependencies = {} + self._paral_op_shape: Dict[str, List[int]] = {} ( - self._subgraphs, self._subgraphs_inputs, self._subgraphs_outputs, + ) = self.get_split_strategy() + self._call_table = {} + ( + self._subgraphs ) = self.build_subgraph_by_group() self._maingraphs = {} self._modules = {} # 新增:子图参数索引表 {子图名: 参数索引列表} - self._subgraph_param_indices = defaultdict(list) + self._subgraph_param_info = defaultdict(dict) @property def subgraphs(self): @@ -91,32 +97,139 @@ def modules(self): def subgraph_param_indices(self): return list(self._subgraph_param_indices.values()) - def build_subgraph_by_group(self): + def _add_paral_op_shape(self, op_name, shape): + if op_name not in self._paral_op_shape.keys(): + # print(op_name, shape) + self._paral_op_shape[op_name] = shape + + def _normalize_binary_operator_shape(self, shp1, shp2): + """Normalize the shape of two input tensors according to the broadcasting + rule""" + shp1 = list(shp1) + shp2 = list(shp2) + while len(shp1) < len(shp2): + shp1.insert(0, 1) + while len(shp2) < len(shp1): + shp2.insert(0, 1) + + return shp1, shp2 + + def _infer_new_shape_with_neg_one(self, old_shape, new_shape): + total_size = 1 + for dim_siz in old_shape: + total_size *= dim_siz + + neg_one_cnt = 0 + rest_size = 1 + for dim_siz in new_shape: + if dim_siz == -1: + neg_one_cnt += 1 + continue + rest_size *= dim_siz + + if neg_one_cnt != 0: + if neg_one_cnt > 1 or total_size % rest_size != 0: + raise ValueError("Can not infer the new shape!") + infer_dim_size = total_size // rest_size + for i, _ in enumerate(new_shape): + if new_shape[i] == -1: + new_shape[i] = infer_dim_size + return new_shape + + def get_pack_params_size(self, tensors_meta: list[TensorMeta]) -> int: + param_total_size = 0 + for tensor_meta in tensors_meta: + param_total_size += functools.reduce( + lambda x, y: x * y, list(tensor_meta.shape), 1 + ) + return param_total_size + def get_split_strategy(self): """ - Builds subgraphs from a given graph based on groups. + Group ops based on the computational graph in terms of subgraphs. + + Analyse the inputs and outputs of each subgraph. - Args: - - graph (Graph): The graph from which subgraphs are constructed. + Update the shape information of the nodes in each subgraph + associated with the weight matrix to be split. Returns: - - tuple: A tuple containing dictionaries of subgraphs, subgraph inputs, - and subgraph outputs. + - None """ + # 对计算图的op进行分组,分组策略为:以PowOp为间隔放在一个subgraph中,忽略PlaceholderOp和OutputOp + submodel_count = 0 + ops_count = [6, 50, 2, 6, 14, 2] + pow_count = 0 + tsf_count = 0 + for i, op in enumerate(self._graph._body): + if isinstance(op, PlaceholderOp) or isinstance(op, OutputOp): + continue + + if "subgraph{}".format(submodel_count) not in self.op_groups.keys(): + subgraph_name = "subgraph{}".format(submodel_count) + self.group_map_device[subgraph_name] = DeviceType.CPU + self.op_groups[subgraph_name] = [op] + continue + + # todo: Added handling of more complex embedding cases + if isinstance(op, PowOp): + pow_count += 1 + submodel_count += 1 + tsf_count = 1 + subgraph_name = "subgraph{}".format(submodel_count) + self.group_map_device[subgraph_name] = DeviceType.CPU + self.op_groups[subgraph_name] = [op] + continue + + if pow_count > 0 and pow_count < 65: + if tsf_count == ops_count[(submodel_count-1)%6]: + tsf_count = 1 + submodel_count += 1 + subgraph_name = "subgraph{}".format(submodel_count) + self.group_map_device[subgraph_name] = DeviceType.CPU + self.op_groups[subgraph_name] = [op] + continue + else: + tsf_count += 1 + + subgraph_name = "subgraph{}".format(submodel_count) + group = self.op_groups[subgraph_name] + group.append(op) + self.op_groups[subgraph_name] = group + + self._subgraph_dependencies = { + subgraph_name: set() + for subgraph_name in list(self.op_groups.keys()) + } # 识别每个子图的输入节点,并将这些输入节点存储在subgraphs_inputs中 # 每个子图的输入节点是那些不属于当前子图但与当前子图中的操作有依赖关系的节点。 subgraphs_inputs = {} + # 预置每个权重矩阵的分割位置 + paral_pos0 = [-1, -1, -1] + paral_pos1 = [[1, -1], [0, -1, 0, 0, -1, -1, -1, 1], [0, 1], [1, -1], [0, -1, 0, 1], [0, 1]] + # Identify inputs for each subgraph - for subgraph_name in self._graph.op_groups.keys(): + for i, subgraph_name in enumerate(self.op_groups.keys()): subgraphs_inputs[subgraph_name] = [] - for op in self._graph.op_groups[subgraph_name]: + if(i == 0 or i == 193): + paral_pos = paral_pos0 + else: + paral_pos = paral_pos1[(i-1)%6] + input_count = 0 + for op in self.op_groups[subgraph_name]: for parent in op._parents: - if ( - self._graph.node_table[parent] - not in self._graph.op_groups[subgraph_name] - ) and (parent not in subgraphs_inputs[subgraph_name]): - subgraphs_inputs[subgraph_name].append(parent) + op_parent = self._graph.node_table[parent] + if ( op_parent not in self.op_groups[subgraph_name] + ) and (op_parent not in subgraphs_inputs[subgraph_name]): + subgraphs_inputs[subgraph_name].append(op_parent) + op_parent_shape = list(op_parent.tensor_meta["shape"]) + pos = paral_pos[input_count] + input_count += 1 + if(pos != -1 and pos < len(op_parent_shape)): + op_parent_shape[pos] = op_parent_shape[pos] // self._parallelism + self._add_paral_op_shape(parent, op_parent_shape) + subgraphs_outputs = {} output_node = [] @@ -130,53 +243,177 @@ def build_subgraph_by_group(self): # 识别每个子图的输出节点,并建立子图之间的依赖关系。 # Identify outputs for each subgraph and build dependencies between subgraphs - for subgraph_name in self._graph.op_groups.keys(): + for subgraph_name in self.op_groups.keys(): subgraphs_outputs[subgraph_name] = [] - for op in self._graph.op_groups[subgraph_name]: + for op in self.op_groups[subgraph_name]: for key in subgraphs_inputs.keys(): - if op.name in subgraphs_inputs[key]: - if(op.name not in subgraphs_outputs[subgraph_name]): - subgraphs_outputs[subgraph_name].append(op.name) + if op in subgraphs_inputs[key]: + if(op not in subgraphs_outputs[subgraph_name]): + subgraphs_outputs[subgraph_name].append(op) self._subgraph_dependencies[subgraph_name].add(key) if (op.name in output_node) and ( - op.name not in subgraphs_outputs[subgraph_name] + op not in subgraphs_outputs[subgraph_name] ): - subgraphs_outputs[subgraph_name].append(op.name) + subgraphs_outputs[subgraph_name].append(op) + + # 更新每个子图中与需要拆分的权重矩阵相关的节点的shape信息 + for subgraph_name in self.op_groups.keys(): + new_shape = [] + for node in self.op_groups[subgraph_name]: + if isinstance(node, PermuteOp) and node != self.op_groups[subgraph_name][-1]: + if node.args[0] in self._paral_op_shape.keys(): + old_shape = self._paral_op_shape[node.args[0]] + new_shape = [old_shape[index] for index in node.args[1]] + if node != self.op_groups[subgraph_name][-1]: + self._add_paral_op_shape(node.name, new_shape) + elif isinstance(node, MatmulOp) and node != self.op_groups[subgraph_name][-1]: + assert len(node.args) == 2 + # 由于MatmulOp的输入参数是其他op的结果,所以无法通过两个参数的shape来预测出结果shape + for op_arg in node.args: + if op_arg in self._paral_op_shape.keys(): + if(node.args[0] in self._paral_op_shape.keys()): + input1_shape = self._paral_op_shape[node.args[0]] + else: + input1 = self._graph.node_table[node.args[0]] + input1_shape = list(input1.tensor_meta["shape"]) + if(node.args[1] in self._paral_op_shape.keys()): + input2_shape = self._paral_op_shape[node.args[1]] + else: + input2 = self._graph.node_table[node.args[1]] + input2_shape = list(input2.tensor_meta["shape"]) + new_shape = input1_shape + new_shape[-1] = input2_shape[-1] + self._add_paral_op_shape(node.name, new_shape) + break + elif isinstance(node, AddOp | SubOp | MulOp | DivOp) and node != self.op_groups[subgraph_name][-1]: + assert len(node.args) == 2 + # 由于MatmulOp的输入参数是其他op的结果,所以无法通过两个参数的shape来预测出结果shape + for i, op_arg in enumerate(node.args): + if op_arg in self._paral_op_shape.keys(): + broadcasted_result_shp = [] + if isinstance(node.args[1-i], int | float): + broadcasted_result_shp = self._paral_op_shape[op_arg] + else: + if(node.args[0] in self._paral_op_shape.keys()): + input1_shape = self._paral_op_shape[node.args[0]] + else: + input1 = self._graph.node_table[node.args[0]] + input1_shape = list(input1.tensor_meta["shape"]) + if(node.args[1] in self._paral_op_shape.keys()): + input2_shape = self._paral_op_shape[node.args[1]] + else: + input2 = self._graph.node_table[node.args[1]] + input2_shape = list(input2.tensor_meta["shape"]) + norm_input1_shape, norm_input2_shape = self._normalize_binary_operator_shape( + input1_shape, input2_shape + ) + for dim1, dim2 in zip(norm_input1_shape, norm_input2_shape): + broadcasted_result_shp.append(max(dim1, dim2)) + self._add_paral_op_shape(node.name, broadcasted_result_shp) + del i + elif isinstance(node, ViewOp) and node != self.op_groups[subgraph_name][-1]: + if node.args[0] in self._paral_op_shape.keys(): + old_shape = self._paral_op_shape[node.args[0]] + old_len = len(old_shape) + tmp_old_shape = [] + for i in range(old_len): + if old_shape[i] != 1: + tmp_old_shape.append(old_shape[i]) + new_shape = list(node.args[1]) + new_len = len(new_shape) + tmp_new_shape = [] + for i in range(new_len): + if new_shape[i] != 1: + tmp_new_shape.append(new_shape[i]) + if len(tmp_old_shape) == len(tmp_new_shape): + # todo: 待优化,当前处理方式只考虑 <--> <1xMxNx...>的情况 + if old_len < new_len: + for i in range(old_len): + new_shape[i+1] = old_shape[i] + else: + for i in range(new_len): + new_shape[i] = old_shape[i+1] + else: + # todo: 待优化,当前处理方式只考虑<...xMxN> <--> <...xMxYxZ>(其中N=YxZ)的情况 + if old_len < new_len: + new_shape[-2] = old_shape[-1] // new_shape[-1] + else: + new_shape = self._infer_new_shape_with_neg_one(old_shape, new_shape) + self._add_paral_op_shape(node.name, new_shape) + elif isinstance(node, CatOp) and node != self.op_groups[subgraph_name][-1]: + for op_arg in node.args[0]: + op_arg = str(op_arg) + if op_arg in self._paral_op_shape.keys(): + new_shape = self._paral_op_shape[op_arg] + self._add_paral_op_shape(node.name, new_shape) + break + else: + if node != self.op_groups[subgraph_name][-1]: + # 默认不属于上述的操作的算子算子都与被切分的算子的shape相同 + for i, op_arg in enumerate(node.args): + if op_arg in self._paral_op_shape.keys(): + new_shape = self._paral_op_shape[op_arg] + self._add_paral_op_shape(node.name, new_shape) + break + + return subgraphs_inputs, subgraphs_outputs + + def build_subgraph_by_group(self): + """ + Builds subgraphs from a given graph based on groups. + + Args: + - graph (Graph): The graph from which subgraphs are constructed. + + Returns: + - tuple: A tuple containing dictionaries of subgraphs, subgraph inputs, + and subgraph outputs. + """ subgraphs = {} # Construct each subgraph - for subgraph_name in self._graph.op_groups.keys(): + for subgraph_name in self.op_groups.keys(): + # print(f"-----------------------subgraph{m}------------------------------") subgraph_input = [] - subgraph_body = [] + subgraph_body = {} # 设备信息 - subgraph_device = self._graph.group_map_device[subgraph_name] + subgraph_device = self.group_map_device[subgraph_name] # Construct input placeholder nodes - for inp in subgraphs_inputs[subgraph_name]: - node = self._graph.node_table[inp] - node_shape = node.tensor_meta["shape"] + for node in self._subgraphs_inputs[subgraph_name]: + if node.name in self._paral_op_shape.keys(): + node_shape = self._paral_op_shape[node.name] + else: + node_shape = node.tensor_meta["shape"] node_dtype = node.tensor_meta["dtype"] input_tensor_meta = TensorMeta(node_shape, node_dtype) subgraph_input.append(input_tensor_meta) placeholder_node = PlaceholderOp() - placeholder_node.name = inp + placeholder_node.name = node.name placeholder_node.tensor_meta = input_tensor_meta - for op in self._graph.op_groups[subgraph_name]: - if inp in node._parents: + for op in self.op_groups[subgraph_name]: + if node.name in node._parents: placeholder_node.add_children(op.name) - subgraph_body.append(placeholder_node) + subgraph_body[placeholder_node.name] = placeholder_node # Add operations to subgraph body - for op in self._graph.op_groups[subgraph_name]: - subgraph_body.append(op) - + for op in self.op_groups[subgraph_name]: + # 遍历当前子图的操作,切分与权重文件相关的操作 + # 与权重文件相关的操作指参数中包含权重矩阵或参数根据权重矩阵计算获得的操作 + + # ReshapeOp会改变shape,需要更新shape参数列表 + if isinstance(op, ViewOp): + if op.args[0] in self._paral_op_shape.keys(): + op._newshape = self._paral_op_shape[op.name] + subgraph_body[op.name] = op + # Construct output node output_node = OutputOp() output_node.name = "output" - for output in subgraphs_outputs[subgraph_name]: - output_node.add_argument(output) - output_node.add_parent(output) - subgraph_body.append(output_node) + for output in self._subgraphs_outputs[subgraph_name]: + output_node.add_argument(output.name) + output_node.add_parent(output.name) + subgraph_body[output_node.name] = output_node # Create subgraph and add it to the dictionary subgraph = Graph( @@ -187,12 +424,13 @@ def build_subgraph_by_group(self): subgraph_device, verbose=self._graph._verbose, ) - subgraph.body = subgraph_body - for op in subgraph_body: + subgraph.body = subgraph_body.values() + for op in subgraph_body.values(): subgraph.node_table[op.name] = op + # subgraph._output = output_node subgraphs[subgraph_name] = subgraph - return subgraphs, subgraphs_inputs, subgraphs_outputs + return subgraphs def topological_sort_subgraph(self): """ @@ -243,6 +481,12 @@ def construct_main_graph(self, do_param_pack=False): """ + # Analysis topology order to sort subgraph call. + topo_order = self.topological_sort_subgraph() + if topo_order == None: + print("Error : Graph Partitioning is illegal!") + return None + # Analysis topology order to sort subgraph call. topo_order = self.topological_sort_subgraph() if topo_order == None: @@ -260,9 +504,11 @@ def construct_main_graph(self, do_param_pack=False): # 为每个子图创建一个FuncOp节点,并将这些节点添加到主图中。 # Adding FuncOp nodes for each subgraph inputs0 = self._graph._inputs + split_group = [] + param_size_group = [] for i, subgraph_name in enumerate(self._subgraphs.keys()): main_graph_name = "forward{}".format(i) - current_param_indices = [] # 存储参数索引的列表 + current_param_info = {} # 存储参数索引和分割方式 main_graph = Graph( [], [], @@ -270,70 +516,117 @@ def construct_main_graph(self, do_param_pack=False): main_graph_name, self._graph._verbose, ) - # main_graph.node_table = node_table - + # 为每个子图创建一个FuncOp节点,并将这些节点添加到对应主图中。 + # FuncOp节点代表每个子图,用于主图对子图的调用 func_node = FuncOp() func_node.name = subgraph_name func_node.tensor_meta = {"shape": [], "dtype": []} for inp in self._subgraphs[subgraph_name]._inputs: func_node.add_argument(inp) - for output in self._subgraphs_outputs[subgraph_name]: - func_node.tensor_meta["shape"].append( - self._graph.node_table[output].tensor_meta["shape"] - ) - func_node.tensor_meta["dtype"].append( - self._graph.node_table[output].tensor_meta["dtype"] - ) + + outputs = self._subgraphs[subgraph_name]._outputs + if outputs is None: + for output in self._subgraphs_outputs[subgraph_name]: + func_node.tensor_meta["shape"].append( + self._graph.node_table[output.name].tensor_meta["shape"] + ) + func_node.tensor_meta["dtype"].append( + self._graph.node_table[output.name].tensor_meta["dtype"] + ) + else: + for out_node in outputs: + out_type = ir.RankedTensorType(out_node.type) + output_shape = list(out_type.shape) + func_node.tensor_meta["shape"].append(torch.Size(output_shape)) + for output in self._subgraphs_outputs[subgraph_name]: + func_node.tensor_meta["dtype"].append( + self._graph.node_table[output.name].tensor_meta["dtype"] + ) main_graph.add_node(func_node) # Adding placeholder operations from the original graph ph_count : int = 0 - for op in self._graph.body: - if isinstance(op, PlaceholderOp) : - if op.name in self._subgraphs_inputs[subgraph_name]: + # 记录子图中是否有权重矩阵被分割 + issplit = False + current_param_info["params"] = [] + current_param_info["total_partitions"] = 1 + split_group.append(1) + for node in self._graph.body: + if isinstance(node, PlaceholderOp) : + if node in self._subgraphs_inputs[subgraph_name]: if(len(self._graph._fake_params) > (ph_count)): main_graph._fake_params.append(self._graph._fake_params[ph_count]) - current_param_indices.append(ph_count) # 记录参数索引 - main_graph.add_node(op) + if node.name in self._paral_op_shape.keys(): + node._newshape = self._paral_op_shape[node.name] + main_graph._fake_params[-1]['shape'] = torch.Size(node._newshape) + current_param_info["params"].append( + {"index": ph_count, "split_degree": node._newshape} + ) + issplit = True + else: + current_param_info["params"].append( + {"index": ph_count, "split_degree": []} + ) + main_graph.add_node(node) ph_count += 1 - self._subgraph_param_indices[subgraph_name] = current_param_indices + param_size_group.append(self.get_pack_params_size(main_graph._fake_params)) + + if issplit: + current_param_info["total_partitions"] = self._parallelism + split_group[-1] = self._parallelism + self._subgraph_param_info[subgraph_name] = current_param_info # Identify inputs for each subgraph maingraph_input = inputs0 - for op in self._subgraphs_inputs[subgraph_name]: - if (op not in main_graph.node_table.keys()): - node = self._graph.node_table[op] - node_shape = node.tensor_meta["shape"] + for node in self._subgraphs_inputs[subgraph_name]: + if (node.name not in main_graph.node_table.keys()): + if node.name in self._paral_op_shape.keys(): + node_shape = self._paral_op_shape[node.name] + # issplit = True + else: + node_shape = node.tensor_meta["shape"] node_dtype = node.tensor_meta["dtype"] input_tensor_meta = TensorMeta(node_shape, node_dtype) maingraph_input.append(input_tensor_meta) placeholder_node = PlaceholderOp() - placeholder_node.name = op + placeholder_node.name = node.name placeholder_node.tensor_meta = input_tensor_meta main_graph._body.append(placeholder_node) - + # if issplit: + # current_param_info["total_partitions"] = self._parallelism # Adding CallOp to invoke the single subgraph call_node = CallOp() call_node.name = "call{}".format(i) call_node.call_func_name = subgraph_name call_node.tensor_meta = {"shape": [], "dtype": []} - for inp in self._subgraphs_inputs[subgraph_name]: - if inp in self._graph.node_table: - call_node.add_argument(inp) + for node in self._subgraphs_inputs[subgraph_name]: + if node.name in self._graph.node_table: + call_node.add_argument(node.name) continue for key, value in self._subgraphs_outputs.items(): - if inp in value: + if node in value: call_node.add_argument( arg=self._call_table[key].name, - arg_index=value.index(inp), + arg_index=value.index(node.name), ) break - for output in self._subgraphs_outputs[subgraph_name]: - call_node.tensor_meta["shape"].append( - self._graph.node_table[output].tensor_meta["shape"] - ) - call_node.tensor_meta["dtype"].append( - self._graph.node_table[output].tensor_meta["dtype"] - ) + outputs = self._subgraphs[subgraph_name]._outputs + if outputs is None: + for output in self._subgraphs_outputs[subgraph_name]: + call_node.tensor_meta["dtype"].append( + self._graph.node_table[output.name].tensor_meta["dtype"] + ) + call_node.tensor_meta["dtype"].append( + self._graph.node_table[output.name].tensor_meta["dtype"] + ) + else: + for out_node in outputs: + out_type = ir.RankedTensorType(out_node.type) + output_shape = list(out_type.shape) + call_node.tensor_meta["shape"].append(torch.Size(output_shape)) + for output in self._subgraphs_outputs[subgraph_name]: + call_node.tensor_meta["dtype"].append( + self._graph.node_table[output.name].tensor_meta["dtype"] + ) self._call_table[subgraph_name] = call_node main_graph.add_node(call_node) @@ -363,3 +656,85 @@ def construct_main_graph(self, do_param_pack=False): ) self._modules[main_graph_name] = main_importer.import_main_graph() inputs0 = [] + + print(f"split_group: {split_group}") + print(f"param_size_group: {param_size_group}") + + # 根据计算图分割的结果,构建子权重矩阵并打包为新权重文件 + def construct_sub_params(self, params, subgraph_entry, output_dir): + """ + 处理参数并根据 subgraph 的配置生成多个权重文件。 + + 参数: + params: 分离出的全部参数,由 params = dynamo_compiler.imported_params[graph]获得 + subgraph: 包含 'params'(参数配置列表) 和 'total_partitions' 键的字典, + 其中每个参数配置包括: + - "index": 在 params 中的索引 + - "split_degree": 分片形状 + output_dir: 输出目录,将在此目录中生成 arg0.data, arg1.data, ... 文件 + """ + subgraph_name, subgraph = subgraph_entry + total_partitions = subgraph["total_partitions"] + + # 为每个分区建立列表,存放各个参数(切分后的部分)的 flattened 数组 + partition_data = [[] for _ in range(total_partitions)] + + # 按 subgraph["params"] 中的顺序处理每个参数 + for param_info in subgraph["params"]: + idx = param_info["index"] + split_degree = param_info["split_degree"] + + # 从参数列表中获取 tensor + tensor = params[idx] + + # 将 tensor 转为 NumPy 数组 + np_tensor = tensor.detach().cpu().numpy() + orig_shape = np_tensor.shape + + if not split_degree: + # 不切分,完整 tensor 复制到每个权重矩阵 + flat = np_tensor.reshape(-1) + for part in range(total_partitions): + partition_data[part].append(flat) + + else: + # split_degree 给出每个切片的形状 + slice_shape = tuple(split_degree) + if len(orig_shape) != len(slice_shape): + raise ValueError( + f"参数索引 {idx} 的原始形状 {orig_shape} 与 split degree {slice_shape} 维度不匹配" + ) + # 确定切分轴:slice_shape[axis] * total_partitions == orig_shape[axis] + axis = None + for dim in range(len(orig_shape)): + if slice_shape[dim] * total_partitions == orig_shape[dim] and \ + all(slice_shape[d] == orig_shape[d] for d in range(len(orig_shape)) if d != dim): + axis = dim + break + if axis is None: + raise ValueError( + f"参数索引 {idx} 的 split degree {slice_shape} 无法与原始形状 {orig_shape} 匹配 (分区数={total_partitions})" + ) + # 按轴切分 + for part in range(total_partitions): + start = part * slice_shape[axis] + end = (part + 1) * slice_shape[axis] + slicer = [slice(None)] * len(orig_shape) + slicer[axis] = slice(start, end) + sliced = np_tensor[tuple(slicer)] + partition_data[part].append(sliced.reshape(-1)) + + # 为每个分区将所有切分后的参数拼接,并写入输出文件 + for part in range(total_partitions): + # 若当前分区没有数据,也生成一个空文件 + if partition_data[part]: + concat_arr = np.concatenate(partition_data[part]) + else: + concat_arr = np.array([]) + filename = os.path.join(output_dir, f"{subgraph_name}_arg{part}.data") + concat_arr.tofile(filename) + + # # 输出调试信息 + # print(f"保存分区 {part} 权重到 {filename}") + # print(f"总元素数: {concat_arr.size}") + # print(f"内存占用: {concat_arr.nbytes/1024**2:.2f} MB\n") diff --git a/frontend/Python/graph/operation.py b/frontend/Python/graph/operation.py index a7901692..b2d9b8a9 100644 --- a/frontend/Python/graph/operation.py +++ b/frontend/Python/graph/operation.py @@ -118,10 +118,22 @@ def add_children(self, child): The user node's name to be added. """ self._children.append(child) + + def split_node(self, dim: int, parallel: int): + """ + Split the node into two nodes. + """ + shape = self._tensor_meta["shape"] + shape[dim] = shape[dim] / parallel + self._tensor_meta["shape"] = shape @property def args(self): return self._arguments + + @property + def parents(self): + return self._parents @property def kwargs(self): @@ -141,13 +153,14 @@ def tensor_meta(self): @tensor_meta.setter def tensor_meta(self, new_tensor_meta): - self._tensor_meta = new_tensor_meta + self._tensor_meta.update(new_tensor_meta) class PlaceholderOp(Op): def __init__(self) -> None: super().__init__() self._op_type = OpType.PlaceholderType + self._newshape: list = None class MatmulOp(Op): @@ -190,7 +203,7 @@ class ViewOp(Op): def __init__(self) -> None: super().__init__() self._op_type = OpType.ReshapeType - + self._newshape: list = None class EmbeddingOp(Op): def __init__(self) -> None: @@ -376,6 +389,7 @@ class ReshapeOp(Op): def __init__(self) -> None: super().__init__() self._op_type = OpType.ReshapeType + self._newshape: list = None class SelectOp(Op): diff --git a/frontend/Python/graph/type.py b/frontend/Python/graph/type.py index 7b89c55a..dcd35b7e 100644 --- a/frontend/Python/graph/type.py +++ b/frontend/Python/graph/type.py @@ -45,7 +45,7 @@ class TensorDType(Enum): Bool = "bool" -class TensorMeta: +class TensorMeta(dict): """ Store tensor metadata, including shape and data type, while overlooking raw data. @@ -65,8 +65,8 @@ class TensorMeta: meta = TensorMeta(shape=(3, 4), dtype='float32') # Access metadata attributes: meta.shape, meta.dtype """ - - def __init__(self, shape, dtype) -> None: + # 修复无法正确构造一个 TensorMeta 实例的问题,给出shape和dtype参数,构造出一个字典 + def __init__(self, shape, dtype): """ Initialize a new instance of the TensorMeta class. @@ -76,8 +76,27 @@ def __init__(self, shape, dtype) -> None: - dtype: str Represents the data type of the tensor. """ - self.shape = shape - self.dtype = dtype + super().__init__(shape=shape, dtype=dtype) + + @property + def shape(self): + """直接通过属性访问 shape""" + return self["shape"] + + @shape.setter + def shape(self, value): + """设置 shape 时会同步更新字典中的值""" + self["shape"] = value + + @property + def dtype(self): + """直接通过属性访问 dtype""" + return self["dtype"] + + @dtype.setter + def dtype(self, value): + """设置 dtype 时会同步更新字典中的值""" + self["dtype"] = value class DeviceType(Enum): """ diff --git a/frontend/Python/ops/func.py b/frontend/Python/ops/func.py index ec27258b..56555a6c 100644 --- a/frontend/Python/ops/func.py +++ b/frontend/Python/ops/func.py @@ -61,7 +61,8 @@ def call_op(node: CallOp, symbol_table: Dict[Tuple[str, int], ir.Operation]): arguments = [] # print(f"args: {str(node.args)}") for i, arg in enumerate(node.args): - input_node = symbol_table.get((str(arg), node._args_index[i])) + key = (str(arg), node._args_index[i]) + input_node = symbol_table.get(key) # print(f"symbol_table: {symbol_table}") # print(f"str: {str(arg)}, input_node: {input_node.type}") if input_node is None: diff --git a/frontend/Python/ops/linalg.py b/frontend/Python/ops/linalg.py index a8feacae..efbcf6fc 100644 --- a/frontend/Python/ops/linalg.py +++ b/frontend/Python/ops/linalg.py @@ -18,6 +18,7 @@ # # ===--------------------------------------------------------------------------- +import torch from typing import Dict, Tuple, List import mlir.ir as ir @@ -820,7 +821,7 @@ def pow_op( if input1 is None: return value = node.args[1] - output_shape = list(node.tensor_meta["shape"]) + output_shape = ir.RankedTensorType(input1.type).shape dtype = node.tensor_meta["dtype"] dtype = mlir_element_type_get(dtype) if not isinstance(value, str): @@ -1160,7 +1161,19 @@ def matmul_op( if input1 is None or input2 is None: return - output_shape = list(node.tensor_meta["shape"]) + # input1_node = node._arg_ops[0] + # input2_node = node._arg_ops[1] + # op_arg1_shape = input1_node.tensor_meta["shape"] + # op_arg2_shape = input2_node.tensor_meta["shape"] + # print(op_arg1_shape, op_arg2_shape) + # op_arg1_list = list(op_arg1_shape) + # op_arg2_list = list(op_arg2_shape) + # mutable_shape = [op_arg1_list[0], op_arg2_list[1]] + # output_shape = torch.Size(mutable_shape) + # # print(output_shape) + input1_shape = ir.RankedTensorType(input1.type).shape + input2_shape = ir.RankedTensorType(input2.type).shape + output_shape = [input1_shape[0], input2_shape[1]] dtype = node.tensor_meta["dtype"] mlir_dtype = mlir_element_type_get(dtype) tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) @@ -1350,7 +1363,8 @@ def neg_op( input1 = symbol_table.get((str(node.args[0]), 0)) if input1 is None: return - output_shape = list(node.tensor_meta["shape"]) + input1_shape = ir.RankedTensorType(input1.type).shape + output_shape = list(input1_shape) dtype = node.tensor_meta["dtype"] mlir_dtype = mlir_element_type_get(dtype) output = tensor.EmptyOp(output_shape, mlir_dtype) @@ -1383,8 +1397,10 @@ def cat_op( dim = int(node.args[1]) if input1 is None or input2 is None: return + input1_shape = ir.RankedTensorType(input1.type).shape + input2_shape = ir.RankedTensorType(input2.type).shape + output_shape = input1_shape[:-1] + [input2_shape[-1] + input1_shape[-1]] - output_shape = list(node.tensor_meta["shape"]) if dim < 0: dim = len(output_shape) + dim dtype = node.tensor_meta["dtype"] @@ -1392,7 +1408,6 @@ def cat_op( output = tensor.EmptyOp(output_shape, mlir_dtype) offset = [0 for x in output_shape] offset_attr = ir._denseI64ArrayAttr(offset, None) - input1_shape = ir.RankedTensorType(input1.type).shape size_attr = ir._denseI64ArrayAttr(input1_shape, None) stride_attr = ir._denseI64ArrayAttr([1] * len(offset), None) insert_input1 = tensor.InsertSliceOp( @@ -1407,7 +1422,6 @@ def cat_op( ) offset[dim] += input1_shape[dim] offset_attr = ir._denseI64ArrayAttr(offset, None) - input2_shape = ir.RankedTensorType(input2.type).shape size_attr = ir._denseI64ArrayAttr(input2_shape, None) insert_input2 = tensor.InsertSliceOp( input2, diff --git a/frontend/Python/ops/tosa.py b/frontend/Python/ops/tosa.py index f29c3218..ed0af753 100644 --- a/frontend/Python/ops/tosa.py +++ b/frontend/Python/ops/tosa.py @@ -428,14 +428,20 @@ def reshape_op(node: ReshapeOp, symbol_table): shape will be inferred automatically. """ input1 = symbol_table.get((str(node.args[0]), 0)) - new_shape = [] - for i in node.args[1]: - new_shape.append(i) + input1_shape = ir.RankedTensorType(input1.type).shape + total_size = 1 - now_shape = ir.RankedTensorType(input1.type).shape - for dim_siz in now_shape: + for dim_siz in input1_shape: total_size *= dim_siz + new_shape = [] + if node._newshape is None: + for i in node.args[1]: + new_shape.append(i) + else: + for i in node._newshape: + new_shape.append(i) + neg_one_cnt = 0 rest_size = 1 for dim_siz in new_shape: @@ -691,7 +697,6 @@ def var_mean_op(node: VarMeanOp, symbol_table): `keepdim` argument is supported. It's handled by the applying a `reshape` operation. - """ def mean_dim_op(_input_tensor: ir.Value, _dim) -> ir.Operation: @@ -1433,7 +1438,8 @@ def sigmoid_op(node: SigmoidOp, symbol_table): input1 = symbol_table.get((str(node.args[0]), 0)) if input1 is None: return - output_shape = list(node.tensor_meta["shape"]) + input_shape = ir.RankedTensorType(input1.type).shape + output_shape = list(input_shape) dtype = node.tensor_meta["dtype"] mlir_dtype = mlir_element_type_get(dtype) tensor_type = ir.RankedTensorType.get(output_shape, mlir_dtype) @@ -1752,15 +1758,15 @@ def scaled_dot_product_flash_attention_for_cpu_op( log_sumexp = tosa.AddOp(max_vals.result.type, max_vals, log_op) log_weights = tosa.SubOp(add_op.result.type, add_op, log_sumexp) softmax_result = math.ExpOp(log_weights) - log_sumexp = tosa.ReshapeOp( - log_sumexp, - memoryview( - array.array( - "i", - output_shape[1], - ) - ), - ) + # log_sumexp = tosa.ReshapeOp( + # log_sumexp, + # memoryview( + # array.array( + # "i", + # output_shape[1], + # ) + # ), + # ) # This step includes dropout during training. # Multiply the result by the value tensor. @@ -1792,8 +1798,8 @@ def scaled_dot_product_flash_attention_for_cpu_op( ) ), ) - - return result_reshape_op, log_sumexp + return result_reshape_op + # return result_reshape_op, log_sumexp ops_registry = {