JFIFXX    $.' ",#(7),01444'9=82<.342  2!!22222222222222222222222222222222222222222222222222"4 ,PG"Z_4˷kjزZ,F+_z,© zh6٨icfu#ډb_N?wQ5-~I8TK<5oIv-k_U_~bMdӜUHh?]EwQk{_}qFW7HTՑYF?_'ϔ_Ջt=||I 6έ"D/[k9Y8ds|\Ҿp6Ҵ].6znopM[mei$[soᘨ˸ nɜG-ĨUycP3.DBli;hjx7Z^NhN3u{:jx힞#M&jL P@_ P&o89@Sz6t7#Oߋ s}YfTlmrZ)'Nk۞pw\Tȯ?8`Oi{wﭹW[r Q4F׊3m&L=h3z~#\l :F,j@ ʱwQT8"kJO6֚l}R>ډK]y&p}b;N1mr$|7>e@BTM*-iHgD) Em|ؘbҗaҾt4oG*oCNrPQ@z,|?W[0:n,jWiEW$~/hp\?{(0+Y8rΟ+>S-SVN;}s?. w9˟<Mq4Wv'{)01mBVW[8/< %wT^5b)iM pgN&ݝVO~qu9 !J27$O-! :%H ـyΠM=t{!S oK8txA& j0 vF Y|y ~6@c1vOpIg4lODL Rcj_uX63?nkWyf;^*B @~a`Eu+6L.ü>}y}_O6͐:YrGXkGl^w~㒶syIu! W XN7BVO!X2wvGRfT#t/?%8^WaTGcLMI(J1~8?aT ]ASE(*E} 2#I/׍qz^t̔bYz4xt){ OH+(EA&NXTo"XC')}Jzp ~5}^+6wcQ|LpdH}(.|kc4^"Z?ȕ a<L!039C EuCFEwç ;n?*oB8bʝ'#RqfM}7]s2tcS{\icTx;\7KPʇ Z O-~c>"?PEO8@8GQgaՎ󁶠䧘_%#r>1zaebqcPѵn#L =׀t L7`VA{C:ge@w1 Xp3c3ġpM"'-@n4fGB3DJ8[JoߐgK)ƛ$ 83+ 6ʻ SkI*KZlT _`?KQKdB`s}>`*>,*@JdoF*弝O}ks]yߘc1GV<=776qPTtXԀ!9*44Tހ3XΛex46YD  BdemDa\_l,G/֌7Y](xTt^%GE4}bTڹ;Y)BQu>J/J ⮶.XԄjݳ+Ed r5_D1 o Bx΢#<W8R6@gM. drD>(otU@x=~v2 ӣdoBd3eO6㣷ݜ66YQz`S{\P~z m5{J/L1xO\ZFu>ck#&:`$ai>2ΔloF[hlEܺΠk:)` $[69kOw\|8}ބ:񶐕IA1/=2[,!.}gN#ub ~݊}34qdELc$"[qU硬g^%B zrpJru%v\h1Yne`ǥ:gpQM~^Xi `S:V29.PV?Bk AEvw%_9CQwKekPؠ\;Io d{ ߞoc1eP\ `E=@KIRYK2NPlLɀ)&eB+ь( JTx_?EZ }@ 6U뙢طzdWIn` D噥[uV"G&Ú2g}&m?ċ"Om# {ON"SXNeysQ@FnVgdX~nj]J58up~.`r\O,ư0oS _Ml4kv\JSdxSW<AeIX$Iw:Sy›R9Q[,5;@]%u@ *rolbI  +%m:͇ZVủθau,RW33 dJeTYE.Mϧ-oj3+yy^cVO9NV\nd1 !͕_)av;թMlWR1)ElP;yوÏu 3k5Pr6<⒲l!˞*u־n!l:UNW %Chx8vL'X@*)̮ˍ D-M+JUkvK+x8cY?Ԡ~3mo|u@[XeYC\Kpx8oCC&N~3-H MXsu<`~"WL$8ξ3a)|:@m\^`@ҷ)5p+6p%i)P Mngc#0AruzRL+xSS?ʮ}()#tmˇ!0}}y$6Lt;$ʳ{^6{v6ķܰgVcnn ~zx«,2u?cE+ȘH؎%Za)X>uWTzNyosFQƤ$*&LLXL)1" LeOɟ9=:tZcŽY?ӭVwv~,Yrۗ|yGaFC.+ v1fήJ]STBn5sW}y$~z'c 8  ,! pVNSNNqy8z˱A4*'2n<s^ǧ˭PJޮɏUGLJ*#i}K%,)[z21z ?Nin1?TIR#m-1lA`fT5+ܐcq՝ʐ,3f2Uեmab#ŠdQy>\)SLYw#.ʑf ,"+w~N'cO3FN<)j&,- љ֊_zSTǦw>?nU仆Ve0$CdrP m׈eXmVu L.bֹ [Դaզ*\y8Է:Ez\0KqC b̘cөQ=0YsNS.3.Oo:#v7[#߫ 5܎LEr49nCOWlG^0k%;YߝZǓ:S#|}y,/kLd TA(AI$+I3;Y*Z}|ӧOdv..#:nf>>ȶITX 8y"dR|)0=n46ⲑ+ra ~]R̲c?6(q;5% |uj~z8R=XIV=|{vGj\gcqz؋%Mߍ1y#@f^^>N#x#۹6Y~?dfPO{P4Vu1E1J *|%JN`eWuzk M6q t[ gGvWIGu_ft5j"Y:Tɐ*; e54q$C2d} _SL#mYpO.C;cHi#֩%+) ӍƲVSYźg |tj38r|V1#;.SQA[S#`n+$$I P\[@s(EDzP])8G#0B[ىXIIq<9~[Z멜Z⊔IWU&A>P~#dp]9 "cP Md?٥Ifتuk/F9c*9Ǎ:ØFzn*@|Iށ9N3{'['ͬҲ4#}!V Fu,,mTIkv C7vB6kT91*l '~ƞFlU'M ][ΩũJ_{iIn$L jOdxkza۪#EClx˘oVɞljr)/,߬hL#^Lф,íMƁe̩NBLiLq}(q6IçJ$WE$:=#(KBzђ xlx?>Պ+>W,Ly!_DŌlQ![ SJ1ƐY}b,+Loxɓ)=yoh@꥟/Iѭ=Py9 ۍYӘe+pJnϱ?V\SO%(t =?MR[Șd/ nlB7j !;ӥ/[-A>dNsLj ,ɪv=1c.SQO3UƀܽE̻9GϷD7(}Ävӌ\y_0[w <΍>a_[0+LF.޺f>oNTq;y\bՃyjH<|q-eɏ_?_9+PHp$[uxK wMwNی'$Y2=qKBP~Yul:[<F12O5=d]Ysw:ϮEj,_QXz`H1,#II dwrP˂@ZJVy$\y{}^~[:NߌUOdؾe${p>G3cĖlʌ ת[`ϱ-WdgIig2 }s ؤ(%#sS@~3XnRG~\jc3vӍLM[JBTs3}jNʖW;7ç?=XF=-=qߚ#='c7ڑWI(O+=:uxqe2zi+kuGR0&eniT^J~\jyp'dtGsO39* b#Ɋ p[BwsT>d4ۧsnvnU_~,vƜJ1s QIz)(lv8MU=;56Gs#KMP=LvyGd}VwWBF'à ?MHUg2 !p7Qjڴ=ju JnA suMeƆҔ!)'8Ϣٔޝ(Vpצ֖d=ICJǠ{qkԭ߸i@Ku|p=..*+xz[Aqġ#s2aƊRR)*HRsi~a &fMP-KL@ZXy'x{}Zm+:)) IJ-iu ܒH'L(7yGӜq j 6ߌg1go,kرtY?W,pefOQS!K۟cҒA|սj>=⬒˧L[ ߿2JaB~Ru:Q] 0H~]7ƼI(}cq 'ήETq?fabӥvr )o-Q_'ᴎoK;Vo%~OK *bf:-ťIR`B5!RB@ï u ̯e\_U_ gES3QTaxU<~c?*#]MW,[8Oax]1bC|踤Plw5V%){t<d50iXSUm:Z┵i"1^B-PhJ&)O*DcWvM)}Pܗ-q\mmζZ-l@}aE6F@&Sg@ݚM ȹ 4#p\HdYDoH"\..RBHz_/5˘6KhJRPmƶim3,#ccoqa)*PtRmk7xDE\Y閣_X<~)c[[BP6YqS0%_;Àv~| VS؇ 'O0F0\U-d@7SJ*z3nyPOm~P3|Yʉr#CSN@ ƮRN)r"C:: #qbY. 6[2K2uǦHYRQMV G$Q+.>nNHq^ qmMVD+-#*U̒ p욳u:IBmPV@Or[b= 1UE_NmyKbNOU}the`|6֮P>\2PVIDiPO;9rmAHGWS]J*_G+kP2KaZH'KxWMZ%OYDRc+o?qGhmdSoh\D|:WUAQc yTq~^H/#pCZTI1ӏT4"ČZ}`w#*,ʹ 0i課Om*da^gJ݅{le9uF#Tֲ̲ٞC"qߍ ոޑo#XZTp@ o8(jdxw],f`~|,s^f1t|m򸄭/ctr5s79Q4H1꠲BB@l9@C+wpxu£Yc9?`@#omHs2)=2.ljg9$YS%*LRY7Z,*=䷘$armoϰUW.|rufIGwtZwo~5 YյhO+=8fF)W7L9lM̘·Y֘YLf큹pRF99.A "wz=E\Z'a 2Ǚ#;'}G*l^"q+2FQ hjkŦ${ޮ-T٭cf|3#~RJt$b(R(rdx >U b&9,>%E\ Άe$'q't*אެb-|dSBOO$R+H)܎K1m`;J2Y~9Og8=vqD`K[F)k[1m޼cn]skz$@)!I x՝"v9=ZA=`Ɠi :E)`7vI}dYI_ o:obo 3Q&D&2= Ά;>hy.*ⅥSӬ+q&j|UƧ}J0WW< ۋS)jQRjƯrN)Gű4Ѷ(S)Ǣ8iW52No˓ ۍ%5brOnL;n\G=^UdI8$&h'+(cȁ߫klS^cƗjԌEꭔgFȒ@}O*;evWVYJ\]X'5ղkFb 6Ro՜mi Ni>J?lPmU}>_Z&KKqrIDՉ~q3fL:Se>E-G{L6pe,8QIhaXaUA'ʂs+טIjP-y8ۈZ?J$WP Rs]|l(ԓsƊio(S0Y 8T97.WiLc~dxcE|2!XKƘਫ਼$((6~|d9u+qd^389Y6L.I?iIq9)O/뚅OXXVZF[یgQLK1RҖr@v#XlFНyS87kF!AsM^rkpjPDyS$Nqnxҍ!Uf!ehi2m`YI9r6 TFC}/y^Η5d'9A-J>{_l+`A['յϛ#w:݅%X}&PStQ"-\縵/$ƗhXb*yBS;Wջ_mcvt?2}1;qSdd~u:2k52R~z+|HE!)Ǟl7`0<,2*Hl-x^'_TVgZA'j ^2ΪN7t?w x1fIzC-ȖK^q;-WDvT78Z hK(P:Q- 8nZ܃e貾<1YT<,"6{/ ?͟|1:#gW>$dJdB=jf[%rE^il:BxSּ1հ,=*7 fcG#q eh?27,!7x6nLC4x},GeǝtC.vS F43zz\;QYC,6~;RYS/6|25vTimlv& nRh^ejRLGf? ۉҬܦƩ|Ȱ>3!viʯ>vオX3e_1zKȗ\qHS,EW[㺨uch⍸O}a>q6n6N6qN ! 1AQaq0@"2BRb#Pr3C`Scst$4D%Td ?Na3mCwxAmqmm$4n淿t'C"wzU=D\R+wp+YT&պ@ƃ3ޯ?AﶂaŘ@-Q=9Dռѻ@MVP܅G5fY6# ?0UQ,IX(6ڵ[DIMNލc&υj\XR|,4 jThAe^db#$]wOӪ1y%LYm뭛CUƃߜ}Cy1XνmF8jI]HۺиE@Ii;r8ӭVFՇ| &?3|xBMuSGe=Ӕ#BE5GY!z_eqр/W>|-Ci߇t1ޯќdR3ug=0 5[?#͏qcfH{ ?u=??ǯ}ZzhmΔBFTWPxs}G93 )gGR<>r h$'nchPBjJҧH -N1N?~}-q!=_2hcMlvY%UE@|vM2.Y[|y"EïKZF,ɯ?,q?vM 80jx";9vk+ ֧ ȺU?%vcVmA6Qg^MA}3nl QRNl8kkn'(M7m9وq%ޟ*h$Zk"$9: ?U8Sl,,|ɒxH(ѷGn/Q4PG%Ա8N! &7;eKM749R/%lc>x;>C:th?aKXbheᜋ^$Iհ hr7%F$EFdt5+(M6tÜUU|zW=aTsTgdqPQb'm1{|YXNb P~F^F:k6"j! Ir`1&-$Bevk:y#ywI0x=D4tUPZHڠ底taP6b>xaQ# WeFŮNjpJ* mQN*I-*ȩFg3 5Vʊɮa5FO@{NX?H]31Ri_uѕ 0 F~:60p͈SqX#a5>`o&+<2D: ڝ$nP*)N|yEjF5ټeihyZ >kbHavh-#!Po=@k̆IEN@}Ll?jO߭ʞQ|A07xwt!xfI2?Z<ץTcUj]陎Ltl }5ϓ$,Omˊ;@OjEj(ا,LXLOЦ90O .anA7j4 W_ٓzWjcBy՗+EM)dNg6y1_xp$Lv:9"zpʙ$^JԼ*ϭo=xLj6Ju82AH3$ٕ@=Vv]'qEz;I˼)=ɯx /W(Vp$ mu񶤑OqˎTr㠚xsrGCbypG1ߠw e8$⿄/M{*}W]˷.CK\ުx/$WPwr |i&}{X >$-l?-zglΆ(FhvS*b߲ڡn,|)mrH[a3ר[13o_U3TC$(=)0kgP u^=4 WYCҸ:vQרXàtkm,t*^,}D* "(I9R>``[~Q]#afi6l86:,ssN6j"A4IuQ6E,GnHzSHOuk5$I4ؤQ9@CwpBGv[]uOv0I4\yQѸ~>Z8Taqޣ;za/SI:ܫ_|>=Z8:SUIJ"IY8%b8H:QO6;7ISJҌAά3>cE+&jf$eC+z;V rʺmyeaQf&6ND.:NTvm<- uǝ\MvZYNNT-A>jr!SnO 13Ns%3D@`ܟ 1^c< aɽ̲Xë#w|ycW=9I*H8p^(4՗karOcWtO\ƍR8'KIQ?5>[}yUײ -h=% qThG2)"ו3]!kB*pFDlA,eEiHfPs5H:Փ~H0DتDIhF3c2E9H5zԑʚiX=:mxghd(v׊9iSOd@0ڽ:p5h-t&Xqӕ,ie|7A2O%PEhtjY1wЃ!  ࢽMy7\a@ţJ 4ȻF@o̒?4wx)]P~u57X 9^ܩU;Iꭆ 5 eK27({|Y׎ V\"Z1 Z}(Ǝ"1S_vE30>p; ΝD%xW?W?vo^Vidr[/&>~`9Why;R ;;ɮT?r$g1KACcKl:'3 cﳯ*"t8~l)m+U,z`(>yJ?h>]vЍG*{`;y]IT ;cNUfo¾h/$|NS1S"HVT4uhǜ]v;5͠x'C\SBplh}N ABx%ޭl/Twʽ]D=Kžr㻠l4SO?=k M: cCa#ha)ѐxcsgPiG{+xQI= zԫ+ 8"kñj=|c yCF/*9жh{ ?4o kmQNx;Y4膚aw?6>e]Qr:g,i"ԩA*M7qB?ӕFhV25r[7 Y }LR}*sg+xr2U=*'WSZDW]WǞ<叓{$9Ou4y90-1'*D`c^o?(9uݐ'PI& fJݮ:wSjfP1F:X H9dԯ˝[_54 }*;@ܨ ðynT?ןd#4rGͨH1|-#MrS3G3).᧏3vz֑r$G"`j 1tx0<ƆWh6y6,œGagAyb)hDß_mü gG;evݝnQ C-*oyaMI><]obD":GA-\%LT8c)+y76oQ#*{(F⽕y=rW\p۩cA^e6KʐcVf5$'->ՉN"F"UQ@fGb~#&M=8טJNu9D[̤so~ G9TtW^g5y$bY'سǴ=U-2 #MCt(i lj@Q 5̣i*OsxKf}\M{EV{υƇ);HIfeLȣr2>WIȂ6ik 5YOxȺ>Yf5'|H+98pjn.OyjY~iw'l;s2Y:'lgꥴ)o#'SaaKZ m}`169n"xI *+ }FP"l45'ZgE8?[X7(.Q-*ތL@̲v.5[=t\+CNܛ,gSQnH}*FG16&:t4ُ"Ạ$b |#rsaT ]ӽDP7ո0y)e$ٕvIh'QEAm*HRI=: 4牢) %_iNݧl] NtGHL ɱg<1V,J~ٹ"KQ 9HS9?@kr;we݁]I!{ @G["`J:n]{cAEVʆ#U96j#Ym\qe4hB7Cdv\MNgmAyQL4uLjj9#44tl^}LnR!t±]rh6ٍ>yҏNfU  Fm@8}/ujb9he:AyծwGpΧh5l}3p468)Udc;Us/֔YX1O2uqs`hwgr~{ RmhN؎*q 42*th>#E#HvOq}6e\,Wk#Xb>p}դ3T5†6[@Py*n|'f֧>lư΂̺SU'*qp_SM 'c6m ySʨ;MrƋmKxo,GmPAG:iw9}M(^V$ǒѽ9| aJSQarB;}ٻ֢2%Uc#gNaݕ'v[OY'3L3;,p]@S{lsX'cjwk'a.}}& dP*bK=ɍ!;3ngΊUߴmt'*{,=SzfD Ako~Gaoq_mi}#mPXhύmxǍ΂巿zfQc|kc?WY$_Lvl߶c`?ljݲˏ!V6UЂ(A4y)HpZ_x>eR$/`^'3qˏ-&Q=?CFVR DfV9{8gnh(P"6[D< E~0<@`G6Hгcc cK.5DdB`?XQ2ٿyqo&+1^ DW0ꊩG#QnL3c/x 11[yxპCWCcUĨ80me4.{muI=f0QRls9f9~fǨa"@8ȁQ#cicG$Gr/$W(WV"m7[mAmboD j۳ l^kh׽ # iXnveTka^Y4BNĕ0 !01@Q"2AaPq3BR?@4QT3,㺠W[=JKϞ2r^7vc:9 EߴwS#dIxu:Hp9E! V 2;73|F9Y*ʬFDu&y؟^EAA(ɩ^GV:ݜDy`Jr29ܾ㝉[E;FzxYGUeYC v-txIsםĘqEb+P\ :>iC';k|zرny]#ǿbQw(r|ӹs[D2v-%@;8<a[\o[ϧwI!*0krs)[J9^ʜp1) "/_>o<1AEy^C`x1'ܣnps`lfQ):lb>MejH^?kl3(z:1ŠK&?Q~{ٺhy/[V|6}KbXmn[-75q94dmc^h X5G-}دBޟ |rtMV+]c?-#ڛ^ǂ}LkrOu>-Dry D?:ޞUǜ7V?瓮"#rչģVR;n/_ ؉vݶe5db9/O009G5nWJpA*r9>1.[tsFnQ V 77R]ɫ8_0<՜IFu(v4Fk3E)N:yڮeP`1}$WSJSQNjٺ޵#lј(5=5lǏmoWv-1v,Wmn߀$x_DȬ0¤#QR[Vkzmw"9ZG7'[=Qj8R?zf\a=OU*oBA|G254 p.w7  &ξxGHp B%$gtЏ򤵍zHNuЯ-'40;_3 !01"@AQa2Pq#3BR?ʩcaen^8F<7;EA{EÖ1U/#d1an.1ě0ʾRh|RAo3m3 % 28Q yφHTo7lW>#i`qca m,B-j݋'mR1Ήt>Vps0IbIC.1Rea]H64B>o]($Bma!=?B KǾ+Ծ"nK*+[T#{EJSQs5:U\wĐf3܆&)IԆwE TlrTf6Q|Rh:[K zc֧GC%\_a84HcObiؖV7H )*ģK~Xhչ04?0 E<}3#u? |gS6ꊤ|I#Hڛ աwX97Ŀ%SLy6č|Fa 8b$sקhb9RAu7˨pČ_\*w묦F 4D~f|("mNKiS>$d7SlA/²SL|6N}S˯g]6; #. 403WebShell
403Webshell
Server IP : 173.199.190.172  /  Your IP : 216.73.216.167
Web Server : Apache
System : Linux chs1.nescrow.com.ng 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue Jun 4 14:43:51 UTC 2024 x86_64
User : oysipaoygov ( 1026)
PHP Version : 5.6.40
Disable Function : exec,passthru,shell_exec,system
MySQL : ON  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : ON
Directory :  /lib/gcc/x86_64-redhat-linux/4.8.5/include/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /lib/gcc/x86_64-redhat-linux/4.8.5/include/avx2intrin.h
/* Copyright (C) 2011-2013 Free Software Foundation, Inc.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

#ifndef _IMMINTRIN_H_INCLUDED
# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
#endif

/* Sum absolute 8-bit integer difference of adjacent groups of 4
   byte integers in the first 2 operands.  Starting offsets within
   operands are determined by the 3rd mask operand.  */
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
					      (__v32qi)__Y, __M);
}
#else
#define _mm256_mpsadbw_epu8(X, Y, M)					\
  ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X),		\
					(__v32qi)(__m256i)(Y), (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi8 (__m256i __A)
{
  return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi16 (__m256i __A)
{
  return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi32 (__m256i __A)
{
  return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packs_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packs_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packus_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packus_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
{
  return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
					      (__v4di)__B,
					      __N * 8);
}
#else
/* In that case (__N*8) will be in vreg, and insn will not be matched. */
/* Use define instead */
#define _mm256_alignr_epi8(A, B, N)				   \
  ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A),	   \
					(__v4di)(__m256i)(B),	   \
					(int)(N) * 8))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_si256 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_si256 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_avg_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_avg_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
{
  return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
					       (__v32qi)__Y,
					       (__v32qi)__M);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
					      (__v16hi)__Y,
					       __M);
}
#else
#define _mm256_blend_epi16(X, Y, M)					\
  ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X),		\
					(__v16hi)(__m256i)(Y), (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A,
					     (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A,
					     (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A,
					     (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
					     (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
					      (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
					     (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
					      (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
						(__v32qi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_madd_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
					     (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
}

extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movemask_epi8 (__m256i __A)
{
  return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi8_epi16 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi8_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi8_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi16_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi16_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu8_epi16 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu8_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu8_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu16_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu16_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu32_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
					       (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mullo_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mullo_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_epu32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_si256 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sad_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
					     (__v32qi)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_epi32 (__m256i __A, const int __mask)
{
  return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
{
  return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
{
  return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
}
#else
#define _mm256_shuffle_epi32(A, N) \
  ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
#define _mm256_shufflehi_epi16(A, N) \
  ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
#define _mm256_shufflelo_epi16(A, N) \
  ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sign_epi8 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sign_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sign_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_bslli_epi128 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_si256 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
}
#else
#define _mm256_bslli_epi128(A, N) \
  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
#define _mm256_slli_si256(A, N) \
  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_epi16 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sll_epi16 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_epi32 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sll_epi32 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_epi64 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sll_epi64 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srai_epi16 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sra_epi16 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srai_epi32 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sra_epi32 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_bsrli_epi128 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_si256 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
}
#else
#define _mm256_bsrli_epi128(A, N) \
  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
#define _mm256_srli_si256(A, N) \
  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_epi16 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srl_epi16 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_epi32 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srl_epi32 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_epi64 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srl_epi64 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_si256 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_load_si256 (__m256i const *__X)
{
  return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastss_ps (__m128 __X)
{
  return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastss_ps (__m128 __X)
{
  return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastsd_pd (__m128d __X)
{
  return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastsi128_si256 (__m128i __X)
{
  return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
}

#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
{
  return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
					      (__v4si)__Y,
					      __M);
}
#else
#define _mm_blend_epi32(X, Y, M)					\
  ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X),		\
					(__v4si)(__m128i)(Y), (int)(M)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
					      (__v8si)__Y,
					      __M);
}
#else
#define _mm256_blend_epi32(X, Y, M)					\
  ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X),		\
					(__v8si)(__m256i)(Y), (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastb_epi8 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastw_epi16 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastd_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastq_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastb_epi8 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastw_epi16 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastd_epi32 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastq_epi64 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute4x64_pd (__m256d __X, const int __M)
{
  return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
}
#else
#define _mm256_permute4x64_pd(X, M)			       \
  ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
#endif

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
{
  return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute4x64_epi64 (__m256i __X, const int __M)
{
  return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
}
#else
#define _mm256_permute4x64_epi64(X, M)			       \
  ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
#endif


#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
}
#else
#define _mm256_permute2x128_si256(X, Y, M)				\
  ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extracti128_si256 (__m256i __X, const int __M)
{
  return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
}
#else
#define _mm256_extracti128_si256(X, M)				\
  ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
}
#else
#define _mm256_inserti128_si256(X, Y, M)			 \
  ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
					   (__v2di)(__m128i)(Y), \
					   (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_epi32 (int const *__X, __m256i __M )
{
  return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
						(__v8si)__M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_epi64 (long long const *__X, __m256i __M )
{
  return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
						(__v4di)__M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_epi32 (int const *__X, __m128i __M )
{
  return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
					     (__v4si)__M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_epi64 (long long const *__X, __m128i __M )
{
  return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
					     (__v2di)__M);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
{
  __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
{
  __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
{
  __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
{
  __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sllv_epi32 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sllv_epi64 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srav_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srav_epi32 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srlv_epi32 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srlv_epi64 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_pd (double const *base, __m128i index, const int scale)
{
  __v2df src = _mm_setzero_pd ();
  __v2df mask = _mm_cmpeq_pd (src, src);

  return (__m128d) __builtin_ia32_gathersiv2df (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
		       __m128d mask, const int scale)
{
  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
						base,
						(__v4si)index,
						(__v2df)mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
{
  __v4df src = _mm256_setzero_pd ();
  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);

  return (__m256d) __builtin_ia32_gathersiv4df (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_pd (__m256d src, double const *base,
			  __m128i index, __m256d mask, const int scale)
{
  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
						base,
						(__v4si)index,
						(__v4df)mask,
						scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_pd (double const *base, __m128i index, const int scale)
{
  __v2df src = _mm_setzero_pd ();
  __v2df mask = _mm_cmpeq_pd (src, src);

  return (__m128d) __builtin_ia32_gatherdiv2df (src,
						base,
						(__v2di)index,
						mask,
						scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
		       __m128d mask, const int scale)
{
  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
						base,
						(__v2di)index,
						(__v2df)mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
{
  __v4df src = _mm256_setzero_pd ();
  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);

  return (__m256d) __builtin_ia32_gatherdiv4df (src,
						base,
						(__v4di)index,
						mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_pd (__m256d src, double const *base,
			  __m256i index, __m256d mask, const int scale)
{
  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
						base,
						(__v4di)index,
						(__v4df)mask,
						scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_ps (float const *base, __m128i index, const int scale)
{
  __v4sf src = _mm_setzero_ps ();
  __v4sf mask = _mm_cmpeq_ps (src, src);

  return (__m128) __builtin_ia32_gathersiv4sf (src,
					       base,
					       (__v4si)index,
					       mask,
					       scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
		       __m128 mask, const int scale)
{
  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
					       base,
					       (__v4si)index,
					       (__v4sf)mask,
					       scale);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
{
  __v8sf src = _mm256_setzero_ps ();
  __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);

  return (__m256) __builtin_ia32_gathersiv8sf (src,
					       base,
					       (__v8si)index,
					       mask,
					       scale);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_ps (__m256 src, float const *base,
			  __m256i index, __m256 mask, const int scale)
{
  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
					       base,
					       (__v8si)index,
					       (__v8sf)mask,
					       scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_ps (float const *base, __m128i index, const int scale)
{
  __v4sf src = _mm_setzero_ps ();
  __v4sf mask = _mm_cmpeq_ps (src, src);

  return (__m128) __builtin_ia32_gatherdiv4sf (src,
					       base,
					       (__v2di)index,
					       mask,
					       scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
		       __m128 mask, const int scale)
{
  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
						base,
						(__v2di)index,
						(__v4sf)mask,
						scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
{
  __v4sf src = _mm_setzero_ps ();
  __v4sf mask = _mm_cmpeq_ps (src, src);

  return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
						  base,
						  (__v4di)index,
						  mask,
						  scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_ps (__m128 src, float const *base,
			  __m256i index, __m128 mask, const int scale)
{
  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
						  base,
						  (__v4di)index,
						  (__v4sf)mask,
						  scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_epi64 (long long int const *base,
		     __m128i index, const int scale)
{
  __v2di src = __extension__ (__v2di){ 0, 0 };
  __v2di mask = __extension__ (__v2di){ ~0, ~0 };

  return (__m128i) __builtin_ia32_gathersiv2di (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
			  __m128i index, __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
						base,
						(__v4si)index,
						(__v2di)mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_epi64 (long long int const *base,
			__m128i index, const int scale)
{
  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };

  return (__m256i) __builtin_ia32_gathersiv4di (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
			     __m128i index, __m256i mask, const int scale)
{
  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
						base,
						(__v4si)index,
						(__v4di)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_epi64 (long long int const *base,
		     __m128i index, const int scale)
{
  __v2di src = __extension__ (__v2di){ 0, 0 };
  __v2di mask = __extension__ (__v2di){ ~0, ~0 };

  return (__m128i) __builtin_ia32_gatherdiv2di (src,
						base,
						(__v2di)index,
						mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
			  __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
						base,
						(__v2di)index,
						(__v2di)mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_epi64 (long long int const *base,
			__m256i index, const int scale)
{
  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };

  return (__m256i) __builtin_ia32_gatherdiv4di (src,
						base,
						(__v4di)index,
						mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
			     __m256i index, __m256i mask, const int scale)
{
  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
						base,
						(__v4di)index,
						(__v4di)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
{
  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };

  return (__m128i) __builtin_ia32_gathersiv4si (src,
					       base,
					       (__v4si)index,
					       mask,
					       scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
			  __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
						base,
						(__v4si)index,
						(__v4si)mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
{
  __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
  __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };

  return (__m256i) __builtin_ia32_gathersiv8si (src,
						base,
						(__v8si)index,
						mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
			     __m256i index, __m256i mask, const int scale)
{
  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
						base,
						(__v8si)index,
						(__v8si)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
{
  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };

  return (__m128i) __builtin_ia32_gatherdiv4si (src,
						base,
						(__v2di)index,
						mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
			  __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
						base,
						(__v2di)index,
						(__v4si)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
{
  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };

  return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
						  base,
						  (__v4di)index,
						  mask,
						  scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
			     __m256i index, __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
						   base,
						   (__v4di)index,
						   (__v4si)mask,
						   scale);
}
#else /* __OPTIMIZE__ */
#define _mm_i32gather_pd(BASE, INDEX, SCALE)				\
  (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v4si)(__m128i)INDEX,	\
					 (__v2df)_mm_set1_pd(		\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v4si)(__m128i)INDEX, \
					 (__v2df)(__m128d)MASK,	 \
					 (int)SCALE)

#define _mm256_i32gather_pd(BASE, INDEX, SCALE)				\
  (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v4si)(__m128i)INDEX,	\
					 (__v4df)_mm256_set1_pd(	\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v4si)(__m128i)INDEX, \
					 (__v4df)(__m256d)MASK,	 \
					 (int)SCALE)

#define _mm_i64gather_pd(BASE, INDEX, SCALE)				\
  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v2di)(__m128i)INDEX,	\
					 (__v2df)_mm_set1_pd(		\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v2di)(__m128i)INDEX, \
					 (__v2df)(__m128d)MASK,	 \
					 (int)SCALE)

#define _mm256_i64gather_pd(BASE, INDEX, SCALE)				\
  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v4di)(__m256i)INDEX,	\
					 (__v4df)_mm256_set1_pd(	\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v4di)(__m256i)INDEX, \
					 (__v4df)(__m256d)MASK,	 \
					 (int)SCALE)

#define _mm_i32gather_ps(BASE, INDEX, SCALE)				\
  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (),	\
					(float const *)BASE,		\
					(__v4si)(__m128i)INDEX,		\
					_mm_set1_ps ((float)(int) -1),	\
					(int)SCALE)

#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC,	 \
					(float const *)BASE,	 \
					(__v4si)(__m128i)INDEX,	 \
					(__v4sf)(__m128d)MASK,	 \
					(int)SCALE)

#define _mm256_i32gather_ps(BASE, INDEX, SCALE)			       \
  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
					(float const *)BASE,	       \
					(__v8si)(__m256i)INDEX,	       \
					(__v8sf)_mm256_set1_ps (       \
					  (float)(int) -1),	       \
					(int)SCALE)

#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,	\
					(float const *)BASE,	\
					(__v8si)(__m256i)INDEX, \
					(__v8sf)(__m256d)MASK,	\
					(int)SCALE)

#define _mm_i64gather_ps(BASE, INDEX, SCALE)				\
  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (),	\
					(float const *)BASE,		\
					(__v2di)(__m128i)INDEX,		\
					(__v4sf)_mm_set1_ps (		\
					  (float)(int) -1),		\
					(int)SCALE)

#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,	 \
					(float const *)BASE,	 \
					(__v2di)(__m128i)INDEX,	 \
					(__v4sf)(__m128d)MASK,	 \
					(int)SCALE)

#define _mm256_i64gather_ps(BASE, INDEX, SCALE)				\
  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (),	\
					   (float const *)BASE,		\
					   (__v4di)(__m256i)INDEX,	\
					   (__v4sf)_mm_set1_ps(		\
					     (float)(int) -1),		\
					   (int)SCALE)

#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	   \
  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC,	   \
					   (float const *)BASE,	   \
					   (__v4di)(__m256i)INDEX, \
					   (__v4sf)(__m128)MASK,   \
					   (int)SCALE)

#define _mm_i32gather_epi64(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
					 (long long const *)BASE,	\
					 (__v4si)(__m128i)INDEX,	\
					 (__v2di)_mm_set1_epi64x (-1),	\
					 (int)SCALE)

#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC,	  \
					 (long long const *)BASE, \
					 (__v4si)(__m128i)INDEX,  \
					 (__v2di)(__m128i)MASK,	  \
					 (int)SCALE)

#define _mm256_i32gather_epi64(BASE, INDEX, SCALE)			   \
  (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
					 (long long const *)BASE,	   \
					 (__v4si)(__m128i)INDEX,	   \
					 (__v4di)_mm256_set1_epi64x (-1),  \
					 (int)SCALE)

#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC,	   \
					 (long long const *)BASE,  \
					 (__v4si)(__m128i)INDEX,   \
					 (__v4di)(__m256i)MASK,	   \
					 (int)SCALE)

#define _mm_i64gather_epi64(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
					 (long long const *)BASE,	\
					 (__v2di)(__m128i)INDEX,	\
					 (__v2di)_mm_set1_epi64x (-1),	\
					 (int)SCALE)

#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC,	  \
					 (long long const *)BASE, \
					 (__v2di)(__m128i)INDEX,  \
					 (__v2di)(__m128i)MASK,	  \
					 (int)SCALE)

#define _mm256_i64gather_epi64(BASE, INDEX, SCALE)			   \
  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
					 (long long const *)BASE,	   \
					 (__v4di)(__m256i)INDEX,	   \
					 (__v4di)_mm256_set1_epi64x (-1),  \
					 (int)SCALE)

#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC,	   \
					 (long long const *)BASE,  \
					 (__v4di)(__m256i)INDEX,   \
					 (__v4di)(__m256i)MASK,	   \
					 (int)SCALE)

#define _mm_i32gather_epi32(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (),	\
					 (int const *)BASE,		\
					 (__v4si)(__m128i)INDEX,	\
					 (__v4si)_mm_set1_epi32 (-1),	\
					 (int)SCALE)

#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC,	\
					(int const *)BASE,	\
					(__v4si)(__m128i)INDEX, \
					(__v4si)(__m128i)MASK,	\
					(int)SCALE)

#define _mm256_i32gather_epi32(BASE, INDEX, SCALE)			   \
  (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
					 (int const *)BASE,		   \
					 (__v8si)(__m256i)INDEX,	   \
					 (__v8si)_mm256_set1_epi32 (-1),   \
					 (int)SCALE)

#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC,	   \
					(int const *)BASE,	   \
					(__v8si)(__m256i)INDEX,	   \
					(__v8si)(__m256i)MASK,	   \
					(int)SCALE)

#define _mm_i64gather_epi32(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (),	\
					 (int const *)BASE,		\
					 (__v2di)(__m128i)INDEX,	\
					 (__v4si)_mm_set1_epi32 (-1),	\
					 (int)SCALE)

#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC,	\
					(int const *)BASE,	\
					(__v2di)(__m128i)INDEX, \
					(__v4si)(__m128i)MASK,	\
					(int)SCALE)

#define _mm256_i64gather_epi32(BASE, INDEX, SCALE)			   \
  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
					    (int const *)BASE,		   \
					    (__v4di)(__m256i)INDEX,	   \
					    (__v4si)_mm_set1_epi32(-1),	   \
					    (int)SCALE)

#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC,  \
					   (int const *)BASE,	   \
					   (__v4di)(__m256i)INDEX, \
					   (__v4si)(__m128i)MASK,  \
					   (int)SCALE)
#endif  /* __OPTIMIZE__ */

Youez - 2016 - github.com/yon3zu
LinuXploit