JFIFXX    $.' ",#(7),01444'9=82<.342  2!!22222222222222222222222222222222222222222222222222"4 ,PG"Z_4˷kjزZ,F+_z,© zh6٨icfu#ډb_N?wQ5-~I8TK<5oIv-k_U_~bMdӜUHh?]EwQk{_}qFW7HTՑYF?_'ϔ_Ջt=||I 6έ"D/[k9Y8ds|\Ҿp6Ҵ].6znopM[mei$[soᘨ˸ nɜG-ĨUycP3.DBli;hjx7Z^NhN3u{:jx힞#M&jL P@_ P&o89@Sz6t7#Oߋ s}YfTlmrZ)'Nk۞pw\Tȯ?8`Oi{wﭹW[r Q4F׊3m&L=h3z~#\l :F,j@ ʱwQT8"kJO6֚l}R>ډK]y&p}b;N1mr$|7>e@BTM*-iHgD) Em|ؘbҗaҾt4oG*oCNrPQ@z,|?W[0:n,jWiEW$~/hp\?{(0+Y8rΟ+>S-SVN;}s?. w9˟<Mq4Wv'{)01mBVW[8/< %wT^5b)iM pgN&ݝVO~qu9 !J27$O-! :%H ـyΠM=t{!S oK8txA& j0 vF Y|y ~6@c1vOpIg4lODL Rcj_uX63?nkWyf;^*B @~a`Eu+6L.ü>}y}_O6͐:YrGXkGl^w~㒶syIu! W XN7BVO!X2wvGRfT#t/?%8^WaTGcLMI(J1~8?aT ]ASE(*E} 2#I/׍qz^t̔bYz4xt){ OH+(EA&NXTo"XC')}Jzp ~5}^+6wcQ|LpdH}(.|kc4^"Z?ȕ a<L!039C EuCFEwç ;n?*oB8bʝ'#RqfM}7]s2tcS{\icTx;\7KPʇ Z O-~c>"?PEO8@8GQgaՎ󁶠䧘_%#r>1zaebqcPѵn#L =׀t L7`VA{C:ge@w1 Xp3c3ġpM"'-@n4fGB3DJ8[JoߐgK)ƛ$ 83+ 6ʻ SkI*KZlT _`?KQKdB`s}>`*>,*@JdoF*弝O}ks]yߘc1GV<=776qPTtXԀ!9*44Tހ3XΛex46YD  BdemDa\_l,G/֌7Y](xTt^%GE4}bTڹ;Y)BQu>J/J ⮶.XԄjݳ+Ed r5_D1 o Bx΢#<W8R6@gM. drD>(otU@x=~v2 ӣdoBd3eO6㣷ݜ66YQz`S{\P~z m5{J/L1xO\ZFu>ck#&:`$ai>2ΔloF[hlEܺΠk:)` $[69kOw\|8}ބ:񶐕IA1/=2[,!.}gN#ub ~݊}34qdELc$"[qU硬g^%B zrpJru%v\h1Yne`ǥ:gpQM~^Xi `S:V29.PV?Bk AEvw%_9CQwKekPؠ\;Io d{ ߞoc1eP\ `E=@KIRYK2NPlLɀ)&eB+ь( JTx_?EZ }@ 6U뙢طzdWIn` D噥[uV"G&Ú2g}&m?ċ"Om# {ON"SXNeysQ@FnVgdX~nj]J58up~.`r\O,ư0oS _Ml4kv\JSdxSW<AeIX$Iw:Sy›R9Q[,5;@]%u@ *rolbI  +%m:͇ZVủθau,RW33 dJeTYE.Mϧ-oj3+yy^cVO9NV\nd1 !͕_)av;թMlWR1)ElP;yوÏu 3k5Pr6<⒲l!˞*u־n!l:UNW %Chx8vL'X@*)̮ˍ D-M+JUkvK+x8cY?Ԡ~3mo|u@[XeYC\Kpx8oCC&N~3-H MXsu<`~"WL$8ξ3a)|:@m\^`@ҷ)5p+6p%i)P Mngc#0AruzRL+xSS?ʮ}()#tmˇ!0}}y$6Lt;$ʳ{^6{v6ķܰgVcnn ~zx«,2u?cE+ȘH؎%Za)X>uWTzNyosFQƤ$*&LLXL)1" LeOɟ9=:tZcŽY?ӭVwv~,Yrۗ|yGaFC.+ v1fήJ]STBn5sW}y$~z'c 8  ,! pVNSNNqy8z˱A4*'2n<s^ǧ˭PJޮɏUGLJ*#i}K%,)[z21z ?Nin1?TIR#m-1lA`fT5+ܐcq՝ʐ,3f2Uեmab#ŠdQy>\)SLYw#.ʑf ,"+w~N'cO3FN<)j&,- љ֊_zSTǦw>?nU仆Ve0$CdrP m׈eXmVu L.bֹ [Դaզ*\y8Է:Ez\0KqC b̘cөQ=0YsNS.3.Oo:#v7[#߫ 5܎LEr49nCOWlG^0k%;YߝZǓ:S#|}y,/kLd TA(AI$+I3;Y*Z}|ӧOdv..#:nf>>ȶITX 8y"dR|)0=n46ⲑ+ra ~]R̲c?6(q;5% |uj~z8R=XIV=|{vGj\gcqz؋%Mߍ1y#@f^^>N#x#۹6Y~?dfPO{P4Vu1E1J *|%JN`eWuzk M6q t[ gGvWIGu_ft5j"Y:Tɐ*; e54q$C2d} _SL#mYpO.C;cHi#֩%+) ӍƲVSYźg |tj38r|V1#;.SQA[S#`n+$$I P\[@s(EDzP])8G#0B[ىXIIq<9~[Z멜Z⊔IWU&A>P~#dp]9 "cP Md?٥Ifتuk/F9c*9Ǎ:ØFzn*@|Iށ9N3{'['ͬҲ4#}!V Fu,,mTIkv C7vB6kT91*l '~ƞFlU'M ][ΩũJ_{iIn$L jOdxkza۪#EClx˘oVɞljr)/,߬hL#^Lф,íMƁe̩NBLiLq}(q6IçJ$WE$:=#(KBzђ xlx?>Պ+>W,Ly!_DŌlQ![ SJ1ƐY}b,+Loxɓ)=yoh@꥟/Iѭ=Py9 ۍYӘe+pJnϱ?V\SO%(t =?MR[Șd/ nlB7j !;ӥ/[-A>dNsLj ,ɪv=1c.SQO3UƀܽE̻9GϷD7(}Ävӌ\y_0[w <΍>a_[0+LF.޺f>oNTq;y\bՃyjH<|q-eɏ_?_9+PHp$[uxK wMwNی'$Y2=qKBP~Yul:[<F12O5=d]Ysw:ϮEj,_QXz`H1,#II dwrP˂@ZJVy$\y{}^~[:NߌUOdؾe${p>G3cĖlʌ ת[`ϱ-WdgIig2 }s ؤ(%#sS@~3XnRG~\jc3vӍLM[JBTs3}jNʖW;7ç?=XF=-=qߚ#='c7ڑWI(O+=:uxqe2zi+kuGR0&eniT^J~\jyp'dtGsO39* b#Ɋ p[BwsT>d4ۧsnvnU_~,vƜJ1s QIz)(lv8MU=;56Gs#KMP=LvyGd}VwWBF'à ?MHUg2 !p7Qjڴ=ju JnA suMeƆҔ!)'8Ϣٔޝ(Vpצ֖d=ICJǠ{qkԭ߸i@Ku|p=..*+xz[Aqġ#s2aƊRR)*HRsi~a &fMP-KL@ZXy'x{}Zm+:)) IJ-iu ܒH'L(7yGӜq j 6ߌg1go,kرtY?W,pefOQS!K۟cҒA|սj>=⬒˧L[ ߿2JaB~Ru:Q] 0H~]7ƼI(}cq 'ήETq?fabӥvr )o-Q_'ᴎoK;Vo%~OK *bf:-ťIR`B5!RB@ï u ̯e\_U_ gES3QTaxU<~c?*#]MW,[8Oax]1bC|踤Plw5V%){t<d50iXSUm:Z┵i"1^B-PhJ&)O*DcWvM)}Pܗ-q\mmζZ-l@}aE6F@&Sg@ݚM ȹ 4#p\HdYDoH"\..RBHz_/5˘6KhJRPmƶim3,#ccoqa)*PtRmk7xDE\Y閣_X<~)c[[BP6YqS0%_;Àv~| VS؇ 'O0F0\U-d@7SJ*z3nyPOm~P3|Yʉr#CSN@ ƮRN)r"C:: #qbY. 6[2K2uǦHYRQMV G$Q+.>nNHq^ qmMVD+-#*U̒ p욳u:IBmPV@Or[b= 1UE_NmyKbNOU}the`|6֮P>\2PVIDiPO;9rmAHGWS]J*_G+kP2KaZH'KxWMZ%OYDRc+o?qGhmdSoh\D|:WUAQc yTq~^H/#pCZTI1ӏT4"ČZ}`w#*,ʹ 0i課Om*da^gJ݅{le9uF#Tֲ̲ٞC"qߍ ոޑo#XZTp@ o8(jdxw],f`~|,s^f1t|m򸄭/ctr5s79Q4H1꠲BB@l9@C+wpxu£Yc9?`@#omHs2)=2.ljg9$YS%*LRY7Z,*=䷘$armoϰUW.|rufIGwtZwo~5 YյhO+=8fF)W7L9lM̘·Y֘YLf큹pRF99.A "wz=E\Z'a 2Ǚ#;'}G*l^"q+2FQ hjkŦ${ޮ-T٭cf|3#~RJt$b(R(rdx >U b&9,>%E\ Άe$'q't*אެb-|dSBOO$R+H)܎K1m`;J2Y~9Og8=vqD`K[F)k[1m޼cn]skz$@)!I x՝"v9=ZA=`Ɠi :E)`7vI}dYI_ o:obo 3Q&D&2= Ά;>hy.*ⅥSӬ+q&j|UƧ}J0WW< ۋS)jQRjƯrN)Gű4Ѷ(S)Ǣ8iW52No˓ ۍ%5brOnL;n\G=^UdI8$&h'+(cȁ߫klS^cƗjԌEꭔgFȒ@}O*;evWVYJ\]X'5ղkFb 6Ro՜mi Ni>J?lPmU}>_Z&KKqrIDՉ~q3fL:Se>E-G{L6pe,8QIhaXaUA'ʂs+טIjP-y8ۈZ?J$WP Rs]|l(ԓsƊio(S0Y 8T97.WiLc~dxcE|2!XKƘਫ਼$((6~|d9u+qd^389Y6L.I?iIq9)O/뚅OXXVZF[یgQLK1RҖr@v#XlFНyS87kF!AsM^rkpjPDyS$Nqnxҍ!Uf!ehi2m`YI9r6 TFC}/y^Η5d'9A-J>{_l+`A['յϛ#w:݅%X}&PStQ"-\縵/$ƗhXb*yBS;Wջ_mcvt?2}1;qSdd~u:2k52R~z+|HE!)Ǟl7`0<,2*Hl-x^'_TVgZA'j ^2ΪN7t?w x1fIzC-ȖK^q;-WDvT78Z hK(P:Q- 8nZ܃e貾<1YT<,"6{/ ?͟|1:#gW>$dJdB=jf[%rE^il:BxSּ1հ,=*7 fcG#q eh?27,!7x6nLC4x},GeǝtC.vS F43zz\;QYC,6~;RYS/6|25vTimlv& nRh^ejRLGf? ۉҬܦƩ|Ȱ>3!viʯ>vオX3e_1zKȗ\qHS,EW[㺨uch⍸O}a>q6n6N6qN ! 1AQaq0@"2BRb#Pr3C`Scst$4D%Td ?Na3mCwxAmqmm$4n淿t'C"wzU=D\R+wp+YT&պ@ƃ3ޯ?AﶂaŘ@-Q=9Dռѻ@MVP܅G5fY6# ?0UQ,IX(6ڵ[DIMNލc&υj\XR|,4 jThAe^db#$]wOӪ1y%LYm뭛CUƃߜ}Cy1XνmF8jI]HۺиE@Ii;r8ӭVFՇ| &?3|xBMuSGe=Ӕ#BE5GY!z_eqр/W>|-Ci߇t1ޯќdR3ug=0 5[?#͏qcfH{ ?u=??ǯ}ZzhmΔBFTWPxs}G93 )gGR<>r h$'nchPBjJҧH -N1N?~}-q!=_2hcMlvY%UE@|vM2.Y[|y"EïKZF,ɯ?,q?vM 80jx";9vk+ ֧ ȺU?%vcVmA6Qg^MA}3nl QRNl8kkn'(M7m9وq%ޟ*h$Zk"$9: ?U8Sl,,|ɒxH(ѷGn/Q4PG%Ա8N! &7;eKM749R/%lc>x;>C:th?aKXbheᜋ^$Iհ hr7%F$EFdt5+(M6tÜUU|zW=aTsTgdqPQb'm1{|YXNb P~F^F:k6"j! Ir`1&-$Bevk:y#ywI0x=D4tUPZHڠ底taP6b>xaQ# WeFŮNjpJ* mQN*I-*ȩFg3 5Vʊɮa5FO@{NX?H]31Ri_uѕ 0 F~:60p͈SqX#a5>`o&+<2D: ڝ$nP*)N|yEjF5ټeihyZ >kbHavh-#!Po=@k̆IEN@}Ll?jO߭ʞQ|A07xwt!xfI2?Z<ץTcUj]陎Ltl }5ϓ$,Omˊ;@OjEj(ا,LXLOЦ90O .anA7j4 W_ٓzWjcBy՗+EM)dNg6y1_xp$Lv:9"zpʙ$^JԼ*ϭo=xLj6Ju82AH3$ٕ@=Vv]'qEz;I˼)=ɯx /W(Vp$ mu񶤑OqˎTr㠚xsrGCbypG1ߠw e8$⿄/M{*}W]˷.CK\ުx/$WPwr |i&}{X >$-l?-zglΆ(FhvS*b߲ڡn,|)mrH[a3ר[13o_U3TC$(=)0kgP u^=4 WYCҸ:vQרXàtkm,t*^,}D* "(I9R>``[~Q]#afi6l86:,ssN6j"A4IuQ6E,GnHzSHOuk5$I4ؤQ9@CwpBGv[]uOv0I4\yQѸ~>Z8Taqޣ;za/SI:ܫ_|>=Z8:SUIJ"IY8%b8H:QO6;7ISJҌAά3>cE+&jf$eC+z;V rʺmyeaQf&6ND.:NTvm<- uǝ\MvZYNNT-A>jr!SnO 13Ns%3D@`ܟ 1^c< aɽ̲Xë#w|ycW=9I*H8p^(4՗karOcWtO\ƍR8'KIQ?5>[}yUײ -h=% qThG2)"ו3]!kB*pFDlA,eEiHfPs5H:Փ~H0DتDIhF3c2E9H5zԑʚiX=:mxghd(v׊9iSOd@0ڽ:p5h-t&Xqӕ,ie|7A2O%PEhtjY1wЃ!  ࢽMy7\a@ţJ 4ȻF@o̒?4wx)]P~u57X 9^ܩU;Iꭆ 5 eK27({|Y׎ V\"Z1 Z}(Ǝ"1S_vE30>p; ΝD%xW?W?vo^Vidr[/&>~`9Why;R ;;ɮT?r$g1KACcKl:'3 cﳯ*"t8~l)m+U,z`(>yJ?h>]vЍG*{`;y]IT ;cNUfo¾h/$|NS1S"HVT4uhǜ]v;5͠x'C\SBplh}N ABx%ޭl/Twʽ]D=Kžr㻠l4SO?=k M: cCa#ha)ѐxcsgPiG{+xQI= zԫ+ 8"kñj=|c yCF/*9жh{ ?4o kmQNx;Y4膚aw?6>e]Qr:g,i"ԩA*M7qB?ӕFhV25r[7 Y }LR}*sg+xr2U=*'WSZDW]WǞ<叓{$9Ou4y90-1'*D`c^o?(9uݐ'PI& fJݮ:wSjfP1F:X H9dԯ˝[_54 }*;@ܨ ðynT?ןd#4rGͨH1|-#MrS3G3).᧏3vz֑r$G"`j 1tx0<ƆWh6y6,œGagAyb)hDß_mü gG;evݝnQ C-*oyaMI><]obD":GA-\%LT8c)+y76oQ#*{(F⽕y=rW\p۩cA^e6KʐcVf5$'->ՉN"F"UQ@fGb~#&M=8טJNu9D[̤so~ G9TtW^g5y$bY'سǴ=U-2 #MCt(i lj@Q 5̣i*OsxKf}\M{EV{υƇ);HIfeLȣr2>WIȂ6ik 5YOxȺ>Yf5'|H+98pjn.OyjY~iw'l;s2Y:'lgꥴ)o#'SaaKZ m}`169n"xI *+ }FP"l45'ZgE8?[X7(.Q-*ތL@̲v.5[=t\+CNܛ,gSQnH}*FG16&:t4ُ"Ạ$b |#rsaT ]ӽDP7ո0y)e$ٕvIh'QEAm*HRI=: 4牢) %_iNݧl] NtGHL ɱg<1V,J~ٹ"KQ 9HS9?@kr;we݁]I!{ @G["`J:n]{cAEVʆ#U96j#Ym\qe4hB7Cdv\MNgmAyQL4uLjj9#44tl^}LnR!t±]rh6ٍ>yҏNfU  Fm@8}/ujb9he:AyծwGpΧh5l}3p468)Udc;Us/֔YX1O2uqs`hwgr~{ RmhN؎*q 42*th>#E#HvOq}6e\,Wk#Xb>p}դ3T5†6[@Py*n|'f֧>lư΂̺SU'*qp_SM 'c6m ySʨ;MrƋmKxo,GmPAG:iw9}M(^V$ǒѽ9| aJSQarB;}ٻ֢2%Uc#gNaݕ'v[OY'3L3;,p]@S{lsX'cjwk'a.}}& dP*bK=ɍ!;3ngΊUߴmt'*{,=SzfD Ako~Gaoq_mi}#mPXhύmxǍ΂巿zfQc|kc?WY$_Lvl߶c`?ljݲˏ!V6UЂ(A4y)HpZ_x>eR$/`^'3qˏ-&Q=?CFVR DfV9{8gnh(P"6[D< E~0<@`G6Hгcc cK.5DdB`?XQ2ٿyqo&+1^ DW0ꊩG#QnL3c/x 11[yxპCWCcUĨ80me4.{muI=f0QRls9f9~fǨa"@8ȁQ#cicG$Gr/$W(WV"m7[mAmboD j۳ l^kh׽ # iXnveTka^Y4BNĕ0 !01@Q"2AaPq3BR?@4QT3,㺠W[=JKϞ2r^7vc:9 EߴwS#dIxu:Hp9E! V 2;73|F9Y*ʬFDu&y؟^EAA(ɩ^GV:ݜDy`Jr29ܾ㝉[E;FzxYGUeYC v-txIsםĘqEb+P\ :>iC';k|zرny]#ǿbQw(r|ӹs[D2v-%@;8<a[\o[ϧwI!*0krs)[J9^ʜp1) "/_>o<1AEy^C`x1'ܣnps`lfQ):lb>MejH^?kl3(z:1ŠK&?Q~{ٺhy/[V|6}KbXmn[-75q94dmc^h X5G-}دBޟ |rtMV+]c?-#ڛ^ǂ}LkrOu>-Dry D?:ޞUǜ7V?瓮"#rչģVR;n/_ ؉vݶe5db9/O009G5nWJpA*r9>1.[tsFnQ V 77R]ɫ8_0<՜IFu(v4Fk3E)N:yڮeP`1}$WSJSQNjٺ޵#lј(5=5lǏmoWv-1v,Wmn߀$x_DȬ0¤#QR[Vkzmw"9ZG7'[=Qj8R?zf\a=OU*oBA|G254 p.w7  &ξxGHp B%$gtЏ򤵍zHNuЯ-'40;_3 !01"@AQa2Pq#3BR?ʩcaen^8F<7;EA{EÖ1U/#d1an.1ě0ʾRh|RAo3m3 % 28Q yφHTo7lW>#i`qca m,B-j݋'mR1Ήt>Vps0IbIC.1Rea]H64B>o]($Bma!=?B KǾ+Ծ"nK*+[T#{EJSQs5:U\wĐf3܆&)IԆwE TlrTf6Q|Rh:[K zc֧GC%\_a84HcObiؖV7H )*ģK~Xhչ04?0 E<}3#u? |gS6ꊤ|I#Hڛ աwX97Ŀ%SLy6č|Fa 8b$sקhb9RAu7˨pČ_\*w묦F 4D~f|("mNKiS>$d7SlA/²SL|6N}S˯g]6; #. 403WebShell
403Webshell
Server IP : 173.199.190.172  /  Your IP : 216.73.216.167
Web Server : Apache
System : Linux chs1.nescrow.com.ng 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue Jun 4 14:43:51 UTC 2024 x86_64
User : oysipaoygov ( 1026)
PHP Version : 5.6.40
Disable Function : exec,passthru,shell_exec,system
MySQL : ON  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : ON
Directory :  /lib/gcc/x86_64-redhat-linux/4.8.2/include/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /lib/gcc/x86_64-redhat-linux/4.8.2/include/avxintrin.h
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

/* Implemented from the specification included in the Intel C++ Compiler
   User Guide and Reference, version 11.0.  */

#ifndef _IMMINTRIN_H_INCLUDED
# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
#endif

/* Internal data types for implementing the intrinsics.  */
typedef double __v4df __attribute__ ((__vector_size__ (32)));
typedef float __v8sf __attribute__ ((__vector_size__ (32)));
typedef long long __v4di __attribute__ ((__vector_size__ (32)));
typedef int __v8si __attribute__ ((__vector_size__ (32)));
typedef short __v16hi __attribute__ ((__vector_size__ (32)));
typedef char __v32qi __attribute__ ((__vector_size__ (32)));

/* The Intel API is flexible enough that we must allow aliasing with other
   vector types, and their scalar components.  */
typedef float __m256 __attribute__ ((__vector_size__ (32),
				     __may_alias__));
typedef long long __m256i __attribute__ ((__vector_size__ (32),
					  __may_alias__));
typedef double __m256d __attribute__ ((__vector_size__ (32),
				       __may_alias__));

/* Compare predicates for scalar and packed compare intrinsics.  */

/* Equal (ordered, non-signaling)  */
#define _CMP_EQ_OQ	0x00
/* Less-than (ordered, signaling)  */
#define _CMP_LT_OS	0x01
/* Less-than-or-equal (ordered, signaling)  */
#define _CMP_LE_OS	0x02
/* Unordered (non-signaling)  */
#define _CMP_UNORD_Q	0x03
/* Not-equal (unordered, non-signaling)  */
#define _CMP_NEQ_UQ	0x04
/* Not-less-than (unordered, signaling)  */
#define _CMP_NLT_US	0x05
/* Not-less-than-or-equal (unordered, signaling)  */
#define _CMP_NLE_US	0x06
/* Ordered (nonsignaling)   */
#define _CMP_ORD_Q	0x07
/* Equal (unordered, non-signaling)  */
#define _CMP_EQ_UQ	0x08
/* Not-greater-than-or-equal (unordered, signaling)  */
#define _CMP_NGE_US	0x09
/* Not-greater-than (unordered, signaling)  */
#define _CMP_NGT_US	0x0a
/* False (ordered, non-signaling)  */
#define _CMP_FALSE_OQ	0x0b
/* Not-equal (ordered, non-signaling)  */
#define _CMP_NEQ_OQ	0x0c
/* Greater-than-or-equal (ordered, signaling)  */
#define _CMP_GE_OS	0x0d
/* Greater-than (ordered, signaling)  */
#define _CMP_GT_OS	0x0e
/* True (unordered, non-signaling)  */
#define _CMP_TRUE_UQ	0x0f
/* Equal (ordered, signaling)  */
#define _CMP_EQ_OS	0x10
/* Less-than (ordered, non-signaling)  */
#define _CMP_LT_OQ	0x11
/* Less-than-or-equal (ordered, non-signaling)  */
#define _CMP_LE_OQ	0x12
/* Unordered (signaling)  */
#define _CMP_UNORD_S	0x13
/* Not-equal (unordered, signaling)  */
#define _CMP_NEQ_US	0x14
/* Not-less-than (unordered, non-signaling)  */
#define _CMP_NLT_UQ	0x15
/* Not-less-than-or-equal (unordered, non-signaling)  */
#define _CMP_NLE_UQ	0x16
/* Ordered (signaling)  */
#define _CMP_ORD_S	0x17
/* Equal (unordered, signaling)  */
#define _CMP_EQ_US	0x18
/* Not-greater-than-or-equal (unordered, non-signaling)  */
#define _CMP_NGE_UQ	0x19
/* Not-greater-than (unordered, non-signaling)  */
#define _CMP_NGT_UQ	0x1a
/* False (ordered, signaling)  */
#define _CMP_FALSE_OS	0x1b
/* Not-equal (ordered, signaling)  */
#define _CMP_NEQ_OS	0x1c
/* Greater-than-or-equal (ordered, non-signaling)  */
#define _CMP_GE_OQ	0x1d
/* Greater-than (ordered, non-signaling)  */
#define _CMP_GT_OQ	0x1e
/* True (unordered, signaling)  */
#define _CMP_TRUE_US	0x1f

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_addsub_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_addsub_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
}


extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
}

/* Double/single precision floating point blend instructions - select
   data from 2 sources using constant/variable mask.  */

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
{
  return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
					      (__v4df)__Y,
					      __M);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
{
  return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
					     (__v8sf)__Y,
					     __M);
}
#else
#define _mm256_blend_pd(X, Y, M)					\
  ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X),		\
					(__v4df)(__m256d)(Y), (int)(M)))

#define _mm256_blend_ps(X, Y, M)					\
  ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X),		\
				       (__v8sf)(__m256)(Y), (int)(M)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
{
  return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
					       (__v4df)__Y,
					       (__v4df)__M);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
{
  return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
					      (__v8sf)__Y,
					      (__v8sf)__M);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
}

/* Dot product instructions with mask-defined summing and zeroing parts
   of result.  */

#ifdef __OPTIMIZE__
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
{
  return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
					  (__v8sf)__Y,
					  __M);
}
#else
#define _mm256_dp_ps(X, Y, M)						\
  ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X),		\
				    (__v8sf)(__m256)(Y), (int)(M)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_pd (__m256d __X, __m256d __Y)
{
  return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_ps (__m256 __X, __m256 __Y)
{
  return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_pd (__m256d __X, __m256d __Y)
{
  return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_ps (__m256 __X, __m256 __Y)
{
  return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
{
  return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
					     __mask);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
{
  return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
					    __mask);
}
#else
#define _mm256_shuffle_pd(A, B, N)					\
  ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A),		\
				      (__v4df)(__m256d)(B), (int)(N)))

#define _mm256_shuffle_ps(A, B, N)					\
  ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A),		\
				      (__v8sf)(__m256)(B), (int)(N)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
{
  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
{
  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
{
  return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
					    __P);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
{
  return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
					   __P);
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
{
  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
{
  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
}
#else
#define _mm_cmp_pd(X, Y, P)						\
  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),		\
				   (__v2df)(__m128d)(Y), (int)(P)))

#define _mm_cmp_ps(X, Y, P)						\
  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X),			\
				  (__v4sf)(__m128)(Y), (int)(P)))

#define _mm256_cmp_pd(X, Y, P)						\
  ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X),		\
				      (__v4df)(__m256d)(Y), (int)(P)))

#define _mm256_cmp_ps(X, Y, P)						\
  ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X),		\
				     (__v8sf)(__m256)(Y), (int)(P)))

#define _mm_cmp_sd(X, Y, P)						\
  ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X),		\
				   (__v2df)(__m128d)(Y), (int)(P)))

#define _mm_cmp_ss(X, Y, P)						\
  ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X),			\
				  (__v4sf)(__m128)(Y), (int)(P)))
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_pd (__m128i __A)
{
  return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_ps (__m256i __A)
{
  return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtpd_ps (__m256d __A)
{
  return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtps_epi32 (__m256 __A)
{
  return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtps_pd (__m128 __A)
{
  return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvttpd_epi32 (__m256d __A)
{
  return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtpd_epi32 (__m256d __A)
{
  return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvttps_epi32 (__m256 __A)
{
  return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
}

#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf128_pd (__m256d __X, const int __N)
{
  return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf128_ps (__m256 __X, const int __N)
{
  return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extractf128_si256 (__m256i __X, const int __N)
{
  return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi32 (__m256i __X, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
  return _mm_extract_epi32 (__Y, __N % 4);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi16 (__m256i __X, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
  return _mm_extract_epi16 (__Y, __N % 8);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi8 (__m256i __X, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
  return _mm_extract_epi8 (__Y, __N % 16);
}

#ifdef __x86_64__
extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extract_epi64 (__m256i __X, const int __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
  return _mm_extract_epi64 (__Y, __N % 2);
}
#endif
#else
#define _mm256_extractf128_pd(X, N)					\
  ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X),	\
						(int)(N)))

#define _mm256_extractf128_ps(X, N)					\
  ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X),	\
					       (int)(N)))

#define _mm256_extractf128_si256(X, N)					\
  ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X),	\
						(int)(N)))

#define _mm256_extract_epi32(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
      _mm_extract_epi32 (__Y, (N) % 4);					\
    }))

#define _mm256_extract_epi16(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
      _mm_extract_epi16 (__Y, (N) % 8);					\
    }))

#define _mm256_extract_epi8(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
      _mm_extract_epi8 (__Y, (N) % 16);					\
    }))

#ifdef __x86_64__
#define _mm256_extract_epi64(X, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
      _mm_extract_epi64 (__Y, (N) % 2);					\
    }))
#endif
#endif

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_zeroall (void)
{
  __builtin_ia32_vzeroall ();
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_zeroupper (void)
{
  __builtin_ia32_vzeroupper ();
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutevar_pd (__m128d __A, __m128i __C)
{
  return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
						(__v2di)__C);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar_pd (__m256d __A, __m256i __C)
{
  return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
						   (__v4di)__C);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutevar_ps (__m128 __A, __m128i __C)
{
  return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
					       (__v4si)__C);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar_ps (__m256 __A, __m256i __C)
{
  return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
						  (__v8si)__C);
}

#ifdef __OPTIMIZE__
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute_pd (__m128d __X, const int __C)
{
  return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute_pd (__m256d __X, const int __C)
{
  return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute_ps (__m128 __X, const int __C)
{
  return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute_ps (__m256 __X, const int __C)
{
  return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
}
#else
#define _mm_permute_pd(X, C)						\
  ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))

#define _mm256_permute_pd(X, C)						\
  ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X),	(int)(C)))

#define _mm_permute_ps(X, C)						\
  ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))

#define _mm256_permute_ps(X, C)						\
  ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
{
  return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
						    (__v4df)__Y,
						    __C);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
{
  return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
						   (__v8sf)__Y,
						   __C);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
{
  return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
						    (__v8si)__Y,
						    __C);
}
#else
#define _mm256_permute2f128_pd(X, Y, C)					\
  ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X),	\
					      (__v4df)(__m256d)(Y),	\
					      (int)(C)))

#define _mm256_permute2f128_ps(X, Y, C)					\
  ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X),	\
					     (__v8sf)(__m256)(Y),	\
					     (int)(C)))

#define _mm256_permute2f128_si256(X, Y, C)				\
  ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X),	\
					      (__v8si)(__m256i)(Y),	\
					      (int)(C)))
#endif

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcast_ss (float const *__X)
{
  return (__m128) __builtin_ia32_vbroadcastss (__X);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_sd (double const *__X)
{
  return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_ss (float const *__X)
{
  return (__m256) __builtin_ia32_vbroadcastss256 (__X);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_pd (__m128d const *__X)
{
  return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcast_ps (__m128 const *__X)
{
  return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
}

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
{
  return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
						     (__v2df)__Y,
						     __O);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
{
  return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
						    (__v4sf)__Y,
						    __O);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
{
  return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
						     (__v4si)__Y,
						     __O);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
  __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
  __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
  __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
}

#ifdef __x86_64__
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
{
  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
  __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
  return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
}
#endif
#else
#define _mm256_insertf128_pd(X, Y, O)					\
  ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X),	\
					       (__v2df)(__m128d)(Y),	\
					       (int)(O)))

#define _mm256_insertf128_ps(X, Y, O)					\
  ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X),	\
					      (__v4sf)(__m128)(Y),  	\
					      (int)(O)))

#define _mm256_insertf128_si256(X, Y, O)				\
  ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X),	\
					       (__v4si)(__m128i)(Y),	\
					       (int)(O)))

#define _mm256_insert_epi32(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
      __Y = _mm_insert_epi32 (__Y, (D), (N) % 4);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 2);			\
    }))

#define _mm256_insert_epi16(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
      __Y = _mm_insert_epi16 (__Y, (D), (N) % 8);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 3);			\
    }))

#define _mm256_insert_epi8(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
      __Y = _mm_insert_epi8 (__Y, (D), (N) % 16);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 4);			\
    }))

#ifdef __x86_64__
#define _mm256_insert_epi64(X, D, N)					\
  (__extension__							\
   ({									\
      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
      __Y = _mm_insert_epi64 (__Y, (D), (N) % 2);			\
      _mm256_insertf128_si256 ((X), __Y, (N) >> 1);			\
    }))
#endif
#endif

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_pd (double const *__P)
{
  return *(__m256d *)__P;
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_pd (double *__P, __m256d __A)
{
  *(__m256d *)__P = __A;
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_ps (float const *__P)
{
  return *(__m256 *)__P;
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_ps (float *__P, __m256 __A)
{
  *(__m256 *)__P = __A;
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_pd (double const *__P)
{
  return (__m256d) __builtin_ia32_loadupd256 (__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_pd (double *__P, __m256d __A)
{
  __builtin_ia32_storeupd256 (__P, (__v4df)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_ps (float const *__P)
{
  return (__m256) __builtin_ia32_loadups256 (__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_ps (float *__P, __m256 __A)
{
  __builtin_ia32_storeups256 (__P, (__v8sf)__A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_si256 (__m256i const *__P)
{
  return *__P;
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_si256 (__m256i *__P, __m256i __A)
{
  *__P = __A;
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_si256 (__m256i const *__P)
{
  return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_si256 (__m256i *__P, __m256i __A)
{
  __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_pd (double const *__P, __m128i __M)
{
  return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
					      (__v2di)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
{
  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_pd (double const *__P, __m256i __M)
{
  return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
						 (__v4di)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
{
  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_ps (float const *__P, __m128i __M)
{
  return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
					     (__v4si)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
{
  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_ps (float const *__P, __m256i __M)
{
  return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
						(__v8si)__M);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
{
  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movehdup_ps (__m256 __X)
{
  return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_moveldup_ps (__m256 __X)
{
  return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movedup_pd (__m256d __X)
{
  return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_lddqu_si256 (__m256i const *__P)
{
  return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_si256 (__m256i *__A, __m256i __B)
{
  __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_pd (double *__A, __m256d __B)
{
  __builtin_ia32_movntpd256 (__A, (__v4df)__B);
}

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_ps (float *__P, __m256 __A)
{
  __builtin_ia32_movntps256 (__P, (__v8sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rsqrt_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sqrt_pd (__m256d __A)
{
  return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sqrt_ps (__m256 __A)
{
  return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
}

#ifdef __OPTIMIZE__
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_round_pd (__m256d __V, const int __M)
{
  return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_round_ps (__m256 __V, const int __M)
{
  return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
}
#else
#define _mm256_round_pd(V, M) \
  ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))

#define _mm256_round_ps(V, M) \
  ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
#endif

#define _mm256_ceil_pd(V)	_mm256_round_pd ((V), _MM_FROUND_CEIL)
#define _mm256_floor_pd(V)	_mm256_round_pd ((V), _MM_FROUND_FLOOR)
#define _mm256_ceil_ps(V)	_mm256_round_ps ((V), _MM_FROUND_CEIL)
#define _mm256_floor_ps(V)	_mm256_round_ps ((V), _MM_FROUND_FLOOR)

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_pd (__m256d __A, __m256d __B)
{
  return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_ps (__m256 __A, __m256 __B)
{
  return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_pd (__m128d __M, __m128d __V)
{
  return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_pd (__m128d __M, __m128d __V)
{
  return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_pd (__m128d __M, __m128d __V)
{
  return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_ps (__m128 __M, __m128 __V)
{
  return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_ps (__m128 __M, __m128 __V)
{
  return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_ps (__m128 __M, __m128 __V)
{
  return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_pd (__m256d __M, __m256d __V)
{
  return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_pd (__m256d __M, __m256d __V)
{
  return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_pd (__m256d __M, __m256d __V)
{
  return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_ps (__m256 __M, __m256 __V)
{
  return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_ps (__m256 __M, __m256 __V)
{
  return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_ps (__m256 __M, __m256 __V)
{
  return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testz_si256 (__m256i __M, __m256i __V)
{
  return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testc_si256 (__m256i __M, __m256i __V)
{
  return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_testnzc_si256 (__m256i __M, __m256i __V)
{
  return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movemask_pd (__m256d __A)
{
  return __builtin_ia32_movmskpd256 ((__v4df)__A);
}

extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movemask_ps (__m256 __A)
{
  return __builtin_ia32_movmskps256 ((__v8sf)__A);
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_pd (void)
{
  return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_ps (void)
{
  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
				 0.0, 0.0, 0.0, 0.0 };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_si256 (void)
{
  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
}

/* Create the vector [A B C D].  */
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_pd (double __A, double __B, double __C, double __D)
{
  return __extension__ (__m256d){ __D, __C, __B, __A };
}

/* Create the vector [A B C D E F G H].  */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_ps (float __A, float __B, float __C, float __D,
	       float __E, float __F, float __G, float __H)
{
  return __extension__ (__m256){ __H, __G, __F, __E,
				 __D, __C, __B, __A };
}

/* Create the vector [A B C D E F G H].  */
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi32 (int __A, int __B, int __C, int __D,
		  int __E, int __F, int __G, int __H)
{
  return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
					  __D, __C, __B, __A };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
		  short __q11, short __q10, short __q09, short __q08,
		  short __q07, short __q06, short __q05, short __q04,
		  short __q03, short __q02, short __q01, short __q00)
{
  return __extension__ (__m256i)(__v16hi){
    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
  };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi8  (char __q31, char __q30, char __q29, char __q28,
		  char __q27, char __q26, char __q25, char __q24,
		  char __q23, char __q22, char __q21, char __q20,
		  char __q19, char __q18, char __q17, char __q16,
		  char __q15, char __q14, char __q13, char __q12,
		  char __q11, char __q10, char __q09, char __q08,
		  char __q07, char __q06, char __q05, char __q04,
		  char __q03, char __q02, char __q01, char __q00)
{
  return __extension__ (__m256i)(__v32qi){
    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
    __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
    __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
  };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_epi64x (long long __A, long long __B, long long __C,
		   long long __D)
{
  return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
}

/* Create a vector with all elements equal to A.  */
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_pd (double __A)
{
  return __extension__ (__m256d){ __A, __A, __A, __A };
}

/* Create a vector with all elements equal to A.  */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_ps (float __A)
{
  return __extension__ (__m256){ __A, __A, __A, __A,
				 __A, __A, __A, __A };
}

/* Create a vector with all elements equal to A.  */
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi32 (int __A)
{
  return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
					  __A, __A, __A, __A };
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi16 (short __A)
{
  return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
			   __A, __A, __A, __A, __A, __A, __A, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi8 (char __A)
{
  return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
			  __A, __A, __A, __A, __A, __A, __A, __A,
			  __A, __A, __A, __A, __A, __A, __A, __A,
			  __A, __A, __A, __A, __A, __A, __A, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_epi64x (long long __A)
{
  return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
}

/* Create vectors of elements in the reversed order from the
   _mm256_set_XXX functions.  */

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_pd (double __A, double __B, double __C, double __D)
{
  return _mm256_set_pd (__D, __C, __B, __A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_ps (float __A, float __B, float __C, float __D,
		float __E, float __F, float __G, float __H)
{
  return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
		   int __E, int __F, int __G, int __H)
{
  return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
		   short __q11, short __q10, short __q09, short __q08,
		   short __q07, short __q06, short __q05, short __q04,
		   short __q03, short __q02, short __q01, short __q00)
{
  return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
			   __q04, __q05, __q06, __q07,
			   __q08, __q09, __q10, __q11,
			   __q12, __q13, __q14, __q15);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi8  (char __q31, char __q30, char __q29, char __q28,
		   char __q27, char __q26, char __q25, char __q24,
		   char __q23, char __q22, char __q21, char __q20,
		   char __q19, char __q18, char __q17, char __q16,
		   char __q15, char __q14, char __q13, char __q12,
		   char __q11, char __q10, char __q09, char __q08,
		   char __q07, char __q06, char __q05, char __q04,
		   char __q03, char __q02, char __q01, char __q00)
{
  return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
			  __q04, __q05, __q06, __q07,
			  __q08, __q09, __q10, __q11,
			  __q12, __q13, __q14, __q15,
			  __q16, __q17, __q18, __q19,
			  __q20, __q21, __q22, __q23,
			  __q24, __q25, __q26, __q27,
			  __q28, __q29, __q30, __q31);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_epi64x (long long __A, long long __B, long long __C,
		    long long __D)
{
  return _mm256_set_epi64x (__D, __C, __B, __A);
}

/* Casts between various SP, DP, INT vector types.  Note that these do no
   conversion of values, they just change the type.  */
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd_ps (__m256d __A)
{
  return (__m256) __A;
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd_si256 (__m256d __A)
{
  return (__m256i) __A;
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps_pd (__m256 __A)
{
  return (__m256d) __A;
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps_si256(__m256 __A)
{
  return (__m256i) __A;
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi256_ps (__m256i __A)
{
  return (__m256) __A;
}

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi256_pd (__m256i __A)
{
  return (__m256d) __A;
}

extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd256_pd128 (__m256d __A)
{
  return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
}

extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps256_ps128 (__m256 __A)
{
  return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
}

extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi256_si128 (__m256i __A)
{
  return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
}

/* When cast is done from a 128 to 256-bit type, the low 128 bits of
   the 256-bit result contain source parameter value and the upper 128
   bits of the result are undefined.  Those intrinsics shouldn't
   generate any extra moves.  */

extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castpd128_pd256 (__m128d __A)
{
  return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
}

extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castps128_ps256 (__m128 __A)
{
  return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
}

extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_castsi128_si256 (__m128i __A)
{
  return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
}

Youez - 2016 - github.com/yon3zu
LinuXploit