12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694 |
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "pcre2_internal.h"
- #ifdef SUPPORT_JIT
- /* All-in-one: Since we use the JIT compiler only from here,
- we just include it. This way we don't need to touch the build
- system files. */
- #define SLJIT_CONFIG_AUTO 1
- #define SLJIT_CONFIG_STATIC 1
- #define SLJIT_VERBOSE 0
- #ifdef PCRE2_DEBUG
- #define SLJIT_DEBUG 1
- #else
- #define SLJIT_DEBUG 0
- #endif
- #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
- #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
- static void * pcre2_jit_malloc(size_t size, void *allocator_data)
- {
- pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
- return allocator->malloc(size, allocator->memory_data);
- }
- static void pcre2_jit_free(void *ptr, void *allocator_data)
- {
- pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
- allocator->free(ptr, allocator->memory_data);
- }
- #include "sljit/sljitLir.c"
- #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
- #error Unsupported architecture
- #endif
- /* Defines for debugging purposes. */
- /* 1 - Use unoptimized capturing brackets.
- 2 - Enable capture_last_ptr (includes option 1). */
- /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
- /* 1 - Always have a control head. */
- /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
- /* Allocate memory for the regex stack on the real machine stack.
- Fast, but limited size. */
- #define MACHINE_STACK_SIZE 32768
- /* Growth rate for stack allocated by the OS. Should be the multiply
- of page size. */
- #define STACK_GROWTH_RATE 8192
- /* Enable to check that the allocation could destroy temporaries. */
- #if defined SLJIT_DEBUG && SLJIT_DEBUG
- #define DESTROY_REGISTERS 1
- #endif
- /*
- Short summary about the backtracking mechanism empolyed by the jit code generator:
- The code generator follows the recursive nature of the PERL compatible regular
- expressions. The basic blocks of regular expressions are condition checkers
- whose execute different commands depending on the result of the condition check.
- The relationship between the operators can be horizontal (concatenation) and
- vertical (sub-expression) (See struct backtrack_common for more details).
- 'ab' - 'a' and 'b' regexps are concatenated
- 'a+' - 'a' is the sub-expression of the '+' operator
- The condition checkers are boolean (true/false) checkers. Machine code is generated
- for the checker itself and for the actions depending on the result of the checker.
- The 'true' case is called as the matching path (expected path), and the other is called as
- the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
- branches on the matching path.
- Greedy star operator (*) :
- Matching path: match happens.
- Backtrack path: match failed.
- Non-greedy star operator (*?) :
- Matching path: no need to perform a match.
- Backtrack path: match is required.
- The following example shows how the code generated for a capturing bracket
- with two alternatives. Let A, B, C, D are arbirary regular expressions, and
- we have the following regular expression:
- A(B|C)D
- The generated code will be the following:
- A matching path
- '(' matching path (pushing arguments to the stack)
- B matching path
- ')' matching path (pushing arguments to the stack)
- D matching path
- return with successful match
- D backtrack path
- ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
- B backtrack path
- C expected path
- jump to D matching path
- C backtrack path
- A backtrack path
- Notice, that the order of backtrack code paths are the opposite of the fast
- code paths. In this way the topmost value on the stack is always belong
- to the current backtrack code path. The backtrack path must check
- whether there is a next alternative. If so, it needs to jump back to
- the matching path eventually. Otherwise it needs to clear out its own stack
- frame and continue the execution on the backtrack code paths.
- */
- /*
- Saved stack frames:
- Atomic blocks and asserts require reloading the values of private data
- when the backtrack mechanism performed. Because of OP_RECURSE, the data
- are not necessarly known in compile time, thus we need a dynamic restore
- mechanism.
- The stack frames are stored in a chain list, and have the following format:
- ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
- Thus we can restore the private data to a particular point in the stack.
- */
- typedef struct jit_arguments {
- /* Pointers first. */
- struct sljit_stack *stack;
- PCRE2_SPTR str;
- PCRE2_SPTR begin;
- PCRE2_SPTR end;
- pcre2_match_data *match_data;
- PCRE2_SPTR startchar_ptr;
- PCRE2_UCHAR *mark_ptr;
- int (*callout)(pcre2_callout_block *, void *);
- void *callout_data;
- /* Everything else after. */
- sljit_uw offset_limit;
- sljit_u32 limit_match;
- sljit_u32 oveccount;
- sljit_u32 options;
- } jit_arguments;
- #define JIT_NUMBER_OF_COMPILE_MODES 3
- typedef struct executable_functions {
- void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
- void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
- sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
- sljit_u32 top_bracket;
- sljit_u32 limit_match;
- } executable_functions;
- typedef struct jump_list {
- struct sljit_jump *jump;
- struct jump_list *next;
- } jump_list;
- typedef struct stub_list {
- struct sljit_jump *start;
- struct sljit_label *quit;
- struct stub_list *next;
- } stub_list;
- typedef struct label_addr_list {
- struct sljit_label *label;
- sljit_uw *update_addr;
- struct label_addr_list *next;
- } label_addr_list;
- enum frame_types {
- no_frame = -1,
- no_stack = -2
- };
- enum control_types {
- type_mark = 0,
- type_then_trap = 1
- };
- typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
- /* The following structure is the key data type for the recursive
- code generator. It is allocated by compile_matchingpath, and contains
- the arguments for compile_backtrackingpath. Must be the first member
- of its descendants. */
- typedef struct backtrack_common {
- /* Concatenation stack. */
- struct backtrack_common *prev;
- jump_list *nextbacktracks;
- /* Internal stack (for component operators). */
- struct backtrack_common *top;
- jump_list *topbacktracks;
- /* Opcode pointer. */
- PCRE2_SPTR cc;
- } backtrack_common;
- typedef struct assert_backtrack {
- backtrack_common common;
- jump_list *condfailed;
- /* Less than 0 if a frame is not needed. */
- int framesize;
- /* Points to our private memory word on the stack. */
- int private_data_ptr;
- /* For iterators. */
- struct sljit_label *matchingpath;
- } assert_backtrack;
- typedef struct bracket_backtrack {
- backtrack_common common;
- /* Where to coninue if an alternative is successfully matched. */
- struct sljit_label *alternative_matchingpath;
- /* For rmin and rmax iterators. */
- struct sljit_label *recursive_matchingpath;
- /* For greedy ? operator. */
- struct sljit_label *zero_matchingpath;
- /* Contains the branches of a failed condition. */
- union {
- /* Both for OP_COND, OP_SCOND. */
- jump_list *condfailed;
- assert_backtrack *assert;
- /* For OP_ONCE. Less than 0 if not needed. */
- int framesize;
- } u;
- /* Points to our private memory word on the stack. */
- int private_data_ptr;
- } bracket_backtrack;
- typedef struct bracketpos_backtrack {
- backtrack_common common;
- /* Points to our private memory word on the stack. */
- int private_data_ptr;
- /* Reverting stack is needed. */
- int framesize;
- /* Allocated stack size. */
- int stacksize;
- } bracketpos_backtrack;
- typedef struct braminzero_backtrack {
- backtrack_common common;
- struct sljit_label *matchingpath;
- } braminzero_backtrack;
- typedef struct char_iterator_backtrack {
- backtrack_common common;
- /* Next iteration. */
- struct sljit_label *matchingpath;
- union {
- jump_list *backtracks;
- struct {
- unsigned int othercasebit;
- PCRE2_UCHAR chr;
- BOOL enabled;
- } charpos;
- } u;
- } char_iterator_backtrack;
- typedef struct ref_iterator_backtrack {
- backtrack_common common;
- /* Next iteration. */
- struct sljit_label *matchingpath;
- } ref_iterator_backtrack;
- typedef struct recurse_entry {
- struct recurse_entry *next;
- /* Contains the function entry label. */
- struct sljit_label *entry_label;
- /* Contains the function entry label. */
- struct sljit_label *backtrack_label;
- /* Collects the entry calls until the function is not created. */
- jump_list *entry_calls;
- /* Collects the backtrack calls until the function is not created. */
- jump_list *backtrack_calls;
- /* Points to the starting opcode. */
- sljit_sw start;
- } recurse_entry;
- typedef struct recurse_backtrack {
- backtrack_common common;
- /* Return to the matching path. */
- struct sljit_label *matchingpath;
- /* Recursive pattern. */
- recurse_entry *entry;
- /* Pattern is inlined. */
- BOOL inlined_pattern;
- } recurse_backtrack;
- #define OP_THEN_TRAP OP_TABLE_LENGTH
- typedef struct then_trap_backtrack {
- backtrack_common common;
- /* If then_trap is not NULL, this structure contains the real
- then_trap for the backtracking path. */
- struct then_trap_backtrack *then_trap;
- /* Points to the starting opcode. */
- sljit_sw start;
- /* Exit point for the then opcodes of this alternative. */
- jump_list *quit;
- /* Frame size of the current alternative. */
- int framesize;
- } then_trap_backtrack;
- #define MAX_N_CHARS 12
- #define MAX_DIFF_CHARS 5
- typedef struct fast_forward_char_data {
- /* Number of characters in the chars array, 255 for any character. */
- sljit_u8 count;
- /* Number of last UTF-8 characters in the chars array. */
- sljit_u8 last_count;
- /* Available characters in the current position. */
- PCRE2_UCHAR chars[MAX_DIFF_CHARS];
- } fast_forward_char_data;
- #define MAX_CLASS_RANGE_SIZE 4
- #define MAX_CLASS_CHARS_SIZE 3
- typedef struct compiler_common {
- /* The sljit ceneric compiler. */
- struct sljit_compiler *compiler;
- /* Compiled regular expression. */
- pcre2_real_code *re;
- /* First byte code. */
- PCRE2_SPTR start;
- /* Maps private data offset to each opcode. */
- sljit_s32 *private_data_ptrs;
- /* Chain list of read-only data ptrs. */
- void *read_only_data_head;
- /* Tells whether the capturing bracket is optimized. */
- sljit_u8 *optimized_cbracket;
- /* Tells whether the starting offset is a target of then. */
- sljit_u8 *then_offsets;
- /* Current position where a THEN must jump. */
- then_trap_backtrack *then_trap;
- /* Starting offset of private data for capturing brackets. */
- sljit_s32 cbra_ptr;
- /* Output vector starting point. Must be divisible by 2. */
- sljit_s32 ovector_start;
- /* Points to the starting character of the current match. */
- sljit_s32 start_ptr;
- /* Last known position of the requested byte. */
- sljit_s32 req_char_ptr;
- /* Head of the last recursion. */
- sljit_s32 recursive_head_ptr;
- /* First inspected character for partial matching.
- (Needed for avoiding zero length partial matches.) */
- sljit_s32 start_used_ptr;
- /* Starting pointer for partial soft matches. */
- sljit_s32 hit_start;
- /* Pointer of the match end position. */
- sljit_s32 match_end_ptr;
- /* Points to the marked string. */
- sljit_s32 mark_ptr;
- /* Recursive control verb management chain. */
- sljit_s32 control_head_ptr;
- /* Points to the last matched capture block index. */
- sljit_s32 capture_last_ptr;
- /* Fast forward skipping byte code pointer. */
- PCRE2_SPTR fast_forward_bc_ptr;
- /* Locals used by fast fail optimization. */
- sljit_s32 fast_fail_start_ptr;
- sljit_s32 fast_fail_end_ptr;
- /* Flipped and lower case tables. */
- const sljit_u8 *fcc;
- sljit_sw lcc;
- /* Mode can be PCRE2_JIT_COMPLETE and others. */
- int mode;
- /* TRUE, when minlength is greater than 0. */
- BOOL might_be_empty;
- /* \K is found in the pattern. */
- BOOL has_set_som;
- /* (*SKIP:arg) is found in the pattern. */
- BOOL has_skip_arg;
- /* (*THEN) is found in the pattern. */
- BOOL has_then;
- /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
- BOOL has_skip_in_assert_back;
- /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
- BOOL local_quit_available;
- /* Currently in a positive assertion. */
- BOOL in_positive_assertion;
- /* Newline control. */
- int nltype;
- sljit_u32 nlmax;
- sljit_u32 nlmin;
- int newline;
- int bsr_nltype;
- sljit_u32 bsr_nlmax;
- sljit_u32 bsr_nlmin;
- /* Dollar endonly. */
- int endonly;
- /* Tables. */
- sljit_sw ctypes;
- /* Named capturing brackets. */
- PCRE2_SPTR name_table;
- sljit_sw name_count;
- sljit_sw name_entry_size;
- /* Labels and jump lists. */
- struct sljit_label *partialmatchlabel;
- struct sljit_label *quit_label;
- struct sljit_label *abort_label;
- struct sljit_label *accept_label;
- struct sljit_label *ff_newline_shortcut;
- stub_list *stubs;
- label_addr_list *label_addrs;
- recurse_entry *entries;
- recurse_entry *currententry;
- jump_list *partialmatch;
- jump_list *quit;
- jump_list *positive_assertion_quit;
- jump_list *abort;
- jump_list *failed_match;
- jump_list *accept;
- jump_list *calllimit;
- jump_list *stackalloc;
- jump_list *revertframes;
- jump_list *wordboundary;
- jump_list *anynewline;
- jump_list *hspace;
- jump_list *vspace;
- jump_list *casefulcmp;
- jump_list *caselesscmp;
- jump_list *reset_match;
- BOOL unset_backref;
- BOOL alt_circumflex;
- #ifdef SUPPORT_UNICODE
- BOOL utf;
- BOOL use_ucp;
- jump_list *getucd;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- jump_list *utfreadchar;
- jump_list *utfreadchar16;
- jump_list *utfreadtype8;
- #endif
- #endif /* SUPPORT_UNICODE */
- } compiler_common;
- /* For byte_sequence_compare. */
- typedef struct compare_context {
- int length;
- int sourcereg;
- #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- int ucharptr;
- union {
- sljit_s32 asint;
- sljit_u16 asushort;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- sljit_u8 asbyte;
- sljit_u8 asuchars[4];
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- sljit_u16 asuchars[2];
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- sljit_u32 asuchars[1];
- #endif
- } c;
- union {
- sljit_s32 asint;
- sljit_u16 asushort;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- sljit_u8 asbyte;
- sljit_u8 asuchars[4];
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- sljit_u16 asuchars[2];
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- sljit_u32 asuchars[1];
- #endif
- } oc;
- #endif
- } compare_context;
- /* Undefine sljit macros. */
- #undef CMP
- /* Used for accessing the elements of the stack. */
- #define STACK(i) ((i) * (int)sizeof(sljit_sw))
- #ifdef SLJIT_PREF_SHIFT_REG
- #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
- /* Nothing. */
- #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
- #define SHIFT_REG_IS_R3
- #else
- #error "Unsupported shift register"
- #endif
- #endif
- #define TMP1 SLJIT_R0
- #ifdef SHIFT_REG_IS_R3
- #define TMP2 SLJIT_R3
- #define TMP3 SLJIT_R2
- #else
- #define TMP2 SLJIT_R2
- #define TMP3 SLJIT_R3
- #endif
- #define STR_PTR SLJIT_R1
- #define STR_END SLJIT_S0
- #define STACK_TOP SLJIT_S1
- #define STACK_LIMIT SLJIT_S2
- #define COUNT_MATCH SLJIT_S3
- #define ARGUMENTS SLJIT_S4
- #define RETURN_ADDR SLJIT_R4
- /* Local space layout. */
- /* These two locals can be used by the current opcode. */
- #define LOCALS0 (0 * sizeof(sljit_sw))
- #define LOCALS1 (1 * sizeof(sljit_sw))
- /* Two local variables for possessive quantifiers (char1 cannot use them). */
- #define POSSESSIVE0 (2 * sizeof(sljit_sw))
- #define POSSESSIVE1 (3 * sizeof(sljit_sw))
- /* Max limit of recursions. */
- #define LIMIT_MATCH (4 * sizeof(sljit_sw))
- /* The output vector is stored on the stack, and contains pointers
- to characters. The vector data is divided into two groups: the first
- group contains the start / end character pointers, and the second is
- the start pointers when the end of the capturing group has not yet reached. */
- #define OVECTOR_START (common->ovector_start)
- #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
- #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
- #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
- #if PCRE2_CODE_UNIT_WIDTH == 8
- #define MOV_UCHAR SLJIT_MOV_U8
- #define IN_UCHARS(x) (x)
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- #define MOV_UCHAR SLJIT_MOV_U16
- #define UCHAR_SHIFT (1)
- #define IN_UCHARS(x) ((x) * 2)
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- #define MOV_UCHAR SLJIT_MOV_U32
- #define UCHAR_SHIFT (2)
- #define IN_UCHARS(x) ((x) * 4)
- #else
- #error Unsupported compiling mode
- #endif
- /* Shortcuts. */
- #define DEFINE_COMPILER \
- struct sljit_compiler *compiler = common->compiler
- #define OP1(op, dst, dstw, src, srcw) \
- sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
- #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
- sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
- #define LABEL() \
- sljit_emit_label(compiler)
- #define JUMP(type) \
- sljit_emit_jump(compiler, (type))
- #define JUMPTO(type, label) \
- sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
- #define JUMPHERE(jump) \
- sljit_set_label((jump), sljit_emit_label(compiler))
- #define SET_LABEL(jump, label) \
- sljit_set_label((jump), (label))
- #define CMP(type, src1, src1w, src2, src2w) \
- sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
- #define CMPTO(type, src1, src1w, src2, src2w, label) \
- sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
- #define OP_FLAGS(op, dst, dstw, type) \
- sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
- #define CMOV(type, dst_reg, src, srcw) \
- sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
- #define GET_LOCAL_BASE(dst, dstw, offset) \
- sljit_get_local_base(compiler, (dst), (dstw), (offset))
- #define READ_CHAR_MAX 0x7fffffff
- #define INVALID_UTF_CHAR 888
- static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
- {
- SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
- do cc += GET(cc, 1); while (*cc == OP_ALT);
- SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
- cc += 1 + LINK_SIZE;
- return cc;
- }
- static int no_alternatives(PCRE2_SPTR cc)
- {
- int count = 0;
- SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
- do
- {
- cc += GET(cc, 1);
- count++;
- }
- while (*cc == OP_ALT);
- SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
- return count;
- }
- /* Functions whose might need modification for all new supported opcodes:
- next_opcode
- check_opcode_types
- set_private_data_ptrs
- get_framesize
- init_frame
- get_recurse_data_length
- copy_recurse_data
- compile_matchingpath
- compile_backtrackingpath
- */
- static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
- {
- SLJIT_UNUSED_ARG(common);
- switch(*cc)
- {
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_NOTPROP:
- case OP_PROP:
- case OP_ANYNL:
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- case OP_EXTUNI:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- case OP_CRPOSSTAR:
- case OP_CRPOSPLUS:
- case OP_CRPOSQUERY:
- case OP_CRPOSRANGE:
- case OP_CLASS:
- case OP_NCLASS:
- case OP_REF:
- case OP_REFI:
- case OP_DNREF:
- case OP_DNREFI:
- case OP_RECURSE:
- case OP_CALLOUT:
- case OP_ALT:
- case OP_KET:
- case OP_KETRMAX:
- case OP_KETRMIN:
- case OP_KETRPOS:
- case OP_REVERSE:
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_ONCE:
- case OP_BRA:
- case OP_BRAPOS:
- case OP_CBRA:
- case OP_CBRAPOS:
- case OP_COND:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCBRA:
- case OP_SCBRAPOS:
- case OP_SCOND:
- case OP_CREF:
- case OP_DNCREF:
- case OP_RREF:
- case OP_DNRREF:
- case OP_FALSE:
- case OP_TRUE:
- case OP_BRAZERO:
- case OP_BRAMINZERO:
- case OP_BRAPOSZERO:
- case OP_PRUNE:
- case OP_SKIP:
- case OP_THEN:
- case OP_COMMIT:
- case OP_FAIL:
- case OP_ACCEPT:
- case OP_ASSERT_ACCEPT:
- case OP_CLOSE:
- case OP_SKIPZERO:
- return cc + PRIV(OP_lengths)[*cc];
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_STAR:
- case OP_MINSTAR:
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_QUERY:
- case OP_MINQUERY:
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_EXACT:
- case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_POSQUERY:
- case OP_POSUPTO:
- case OP_STARI:
- case OP_MINSTARI:
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_QUERYI:
- case OP_MINQUERYI:
- case OP_UPTOI:
- case OP_MINUPTOI:
- case OP_EXACTI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
- case OP_POSQUERYI:
- case OP_POSUPTOI:
- case OP_NOTSTAR:
- case OP_NOTMINSTAR:
- case OP_NOTPLUS:
- case OP_NOTMINPLUS:
- case OP_NOTQUERY:
- case OP_NOTMINQUERY:
- case OP_NOTUPTO:
- case OP_NOTMINUPTO:
- case OP_NOTEXACT:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSUPTO:
- case OP_NOTSTARI:
- case OP_NOTMINSTARI:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
- case OP_NOTQUERYI:
- case OP_NOTMINQUERYI:
- case OP_NOTUPTOI:
- case OP_NOTMINUPTOI:
- case OP_NOTEXACTI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERYI:
- case OP_NOTPOSUPTOI:
- cc += PRIV(OP_lengths)[*cc];
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- return cc;
- /* Special cases. */
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- case OP_TYPEPOSUPTO:
- return cc + PRIV(OP_lengths)[*cc] - 1;
- case OP_ANYBYTE:
- #ifdef SUPPORT_UNICODE
- if (common->utf) return NULL;
- #endif
- return cc + 1;
- case OP_CALLOUT_STR:
- return cc + GET(cc, 1 + 2*LINK_SIZE);
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- return cc + GET(cc, 1);
- #endif
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- return cc + 1 + 2 + cc[1];
- default:
- /* All opcodes are supported now! */
- SLJIT_UNREACHABLE();
- return NULL;
- }
- }
- static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
- {
- int count;
- PCRE2_SPTR slot;
- PCRE2_SPTR assert_back_end = cc - 1;
- /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
- while (cc < ccend)
- {
- switch(*cc)
- {
- case OP_SET_SOM:
- common->has_set_som = TRUE;
- common->might_be_empty = TRUE;
- cc += 1;
- break;
- case OP_REF:
- case OP_REFI:
- common->optimized_cbracket[GET2(cc, 1)] = 0;
- cc += 1 + IMM2_SIZE;
- break;
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_COND:
- case OP_SCOND:
- /* Only AUTO_CALLOUT can insert this opcode. We do
- not intend to support this case. */
- if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
- return FALSE;
- cc += 1 + LINK_SIZE;
- break;
- case OP_CREF:
- common->optimized_cbracket[GET2(cc, 1)] = 0;
- cc += 1 + IMM2_SIZE;
- break;
- case OP_DNREF:
- case OP_DNREFI:
- case OP_DNCREF:
- count = GET2(cc, 1 + IMM2_SIZE);
- slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
- while (count-- > 0)
- {
- common->optimized_cbracket[GET2(slot, 0)] = 0;
- slot += common->name_entry_size;
- }
- cc += 1 + 2 * IMM2_SIZE;
- break;
- case OP_RECURSE:
- /* Set its value only once. */
- if (common->recursive_head_ptr == 0)
- {
- common->recursive_head_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_CALLOUT:
- case OP_CALLOUT_STR:
- if (common->capture_last_ptr == 0)
- {
- common->capture_last_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
- break;
- case OP_ASSERTBACK:
- slot = bracketend(cc);
- if (slot > assert_back_end)
- assert_back_end = slot;
- cc += 1 + LINK_SIZE;
- break;
- case OP_THEN_ARG:
- common->has_then = TRUE;
- common->control_head_ptr = 1;
- /* Fall through. */
- case OP_PRUNE_ARG:
- case OP_MARK:
- if (common->mark_ptr == 0)
- {
- common->mark_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- cc += 1 + 2 + cc[1];
- break;
- case OP_THEN:
- common->has_then = TRUE;
- common->control_head_ptr = 1;
- cc += 1;
- break;
- case OP_SKIP:
- if (cc < assert_back_end)
- common->has_skip_in_assert_back = TRUE;
- cc += 1;
- break;
- case OP_SKIP_ARG:
- common->control_head_ptr = 1;
- common->has_skip_arg = TRUE;
- if (cc < assert_back_end)
- common->has_skip_in_assert_back = TRUE;
- cc += 1 + 2 + cc[1];
- break;
- default:
- cc = next_opcode(common, cc);
- if (cc == NULL)
- return FALSE;
- break;
- }
- }
- return TRUE;
- }
- static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
- {
- switch(*cc)
- {
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
- case OP_STAR:
- case OP_MINSTAR:
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_STARI:
- case OP_MINSTARI:
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
- case OP_NOTSTAR:
- case OP_NOTMINSTAR:
- case OP_NOTPLUS:
- case OP_NOTMINPLUS:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
- case OP_NOTSTARI:
- case OP_NOTMINSTARI:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- return TRUE;
- case OP_CLASS:
- case OP_NCLASS:
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
- #else
- cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
- #endif
- switch(*cc)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRPOSSTAR:
- case OP_CRPOSPLUS:
- return TRUE;
- }
- break;
- }
- return FALSE;
- }
- static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
- {
- PCRE2_SPTR cc = common->start;
- PCRE2_SPTR end;
- /* Skip not repeated brackets. */
- while (TRUE)
- {
- switch(*cc)
- {
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- /* Zero width assertions. */
- cc++;
- continue;
- }
- if (*cc != OP_BRA && *cc != OP_CBRA)
- break;
- end = cc + GET(cc, 1);
- if (*end != OP_KET || PRIVATE_DATA(end) != 0)
- return FALSE;
- if (*cc == OP_CBRA)
- {
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- return FALSE;
- cc += IMM2_SIZE;
- }
- cc += 1 + LINK_SIZE;
- }
- if (is_accelerated_repeat(cc))
- {
- common->fast_forward_bc_ptr = cc;
- common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
- *private_data_start += sizeof(sljit_sw);
- return TRUE;
- }
- return FALSE;
- }
- static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
- {
- PCRE2_SPTR next_alt;
- SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
- if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- return;
- next_alt = bracketend(cc) - (1 + LINK_SIZE);
- if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
- return;
- do
- {
- next_alt = cc + GET(cc, 1);
- cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
- while (TRUE)
- {
- switch(*cc)
- {
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- /* Zero width assertions. */
- cc++;
- continue;
- }
- break;
- }
- if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
- detect_fast_fail(common, cc, private_data_start, depth - 1);
- if (is_accelerated_repeat(cc))
- {
- common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
- if (common->fast_fail_start_ptr == 0)
- common->fast_fail_start_ptr = *private_data_start;
- *private_data_start += sizeof(sljit_sw);
- common->fast_fail_end_ptr = *private_data_start;
- if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
- return;
- }
- cc = next_alt;
- }
- while (*cc == OP_ALT);
- }
- static int get_class_iterator_size(PCRE2_SPTR cc)
- {
- sljit_u32 min;
- sljit_u32 max;
- switch(*cc)
- {
- case OP_CRSTAR:
- case OP_CRPLUS:
- return 2;
- case OP_CRMINSTAR:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- return 1;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- min = GET2(cc, 1);
- max = GET2(cc, 1 + IMM2_SIZE);
- if (max == 0)
- return (*cc == OP_CRRANGE) ? 2 : 1;
- max -= min;
- if (max > 2)
- max = 2;
- return max;
- default:
- return 0;
- }
- }
- static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
- {
- PCRE2_SPTR end = bracketend(begin);
- PCRE2_SPTR next;
- PCRE2_SPTR next_end;
- PCRE2_SPTR max_end;
- PCRE2_UCHAR type;
- sljit_sw length = end - begin;
- sljit_s32 min, max, i;
- /* Detect fixed iterations first. */
- if (end[-(1 + LINK_SIZE)] != OP_KET)
- return FALSE;
- /* Already detected repeat. */
- if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
- return TRUE;
- next = end;
- min = 1;
- while (1)
- {
- if (*next != *begin)
- break;
- next_end = bracketend(next);
- if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
- break;
- next = next_end;
- min++;
- }
- if (min == 2)
- return FALSE;
- max = 0;
- max_end = next;
- if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
- {
- type = *next;
- while (1)
- {
- if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
- break;
- next_end = bracketend(next + 2 + LINK_SIZE);
- if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
- break;
- next = next_end;
- max++;
- }
- if (next[0] == type && next[1] == *begin && max >= 1)
- {
- next_end = bracketend(next + 1);
- if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
- {
- for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
- if (*next_end != OP_KET)
- break;
- if (i == max)
- {
- common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
- common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
- /* +2 the original and the last. */
- common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
- if (min == 1)
- return TRUE;
- min--;
- max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
- }
- }
- }
- }
- if (min >= 3)
- {
- common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
- common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
- common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
- return TRUE;
- }
- return FALSE;
- }
- #define CASE_ITERATOR_PRIVATE_DATA_1 \
- case OP_MINSTAR: \
- case OP_MINPLUS: \
- case OP_QUERY: \
- case OP_MINQUERY: \
- case OP_MINSTARI: \
- case OP_MINPLUSI: \
- case OP_QUERYI: \
- case OP_MINQUERYI: \
- case OP_NOTMINSTAR: \
- case OP_NOTMINPLUS: \
- case OP_NOTQUERY: \
- case OP_NOTMINQUERY: \
- case OP_NOTMINSTARI: \
- case OP_NOTMINPLUSI: \
- case OP_NOTQUERYI: \
- case OP_NOTMINQUERYI:
- #define CASE_ITERATOR_PRIVATE_DATA_2A \
- case OP_STAR: \
- case OP_PLUS: \
- case OP_STARI: \
- case OP_PLUSI: \
- case OP_NOTSTAR: \
- case OP_NOTPLUS: \
- case OP_NOTSTARI: \
- case OP_NOTPLUSI:
- #define CASE_ITERATOR_PRIVATE_DATA_2B \
- case OP_UPTO: \
- case OP_MINUPTO: \
- case OP_UPTOI: \
- case OP_MINUPTOI: \
- case OP_NOTUPTO: \
- case OP_NOTMINUPTO: \
- case OP_NOTUPTOI: \
- case OP_NOTMINUPTOI:
- #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
- case OP_TYPEMINSTAR: \
- case OP_TYPEMINPLUS: \
- case OP_TYPEQUERY: \
- case OP_TYPEMINQUERY:
- #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
- case OP_TYPESTAR: \
- case OP_TYPEPLUS:
- #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
- case OP_TYPEUPTO: \
- case OP_TYPEMINUPTO:
- static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
- {
- PCRE2_SPTR cc = common->start;
- PCRE2_SPTR alternative;
- PCRE2_SPTR end = NULL;
- int private_data_ptr = *private_data_start;
- int space, size, bracketlen;
- BOOL repeat_check = TRUE;
- while (cc < ccend)
- {
- space = 0;
- size = 0;
- bracketlen = 0;
- if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
- break;
- if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
- {
- if (detect_repeat(common, cc))
- {
- /* These brackets are converted to repeats, so no global
- based single character repeat is allowed. */
- if (cc >= end)
- end = bracketend(cc);
- }
- }
- repeat_check = TRUE;
- switch(*cc)
- {
- case OP_KET:
- if (common->private_data_ptrs[cc + 1 - common->start] != 0)
- {
- common->private_data_ptrs[cc - common->start] = private_data_ptr;
- private_data_ptr += sizeof(sljit_sw);
- cc += common->private_data_ptrs[cc + 1 - common->start];
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_ONCE:
- case OP_BRAPOS:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCOND:
- common->private_data_ptrs[cc - common->start] = private_data_ptr;
- private_data_ptr += sizeof(sljit_sw);
- bracketlen = 1 + LINK_SIZE;
- break;
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- common->private_data_ptrs[cc - common->start] = private_data_ptr;
- private_data_ptr += sizeof(sljit_sw);
- bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_COND:
- /* Might be a hidden SCOND. */
- alternative = cc + GET(cc, 1);
- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
- {
- common->private_data_ptrs[cc - common->start] = private_data_ptr;
- private_data_ptr += sizeof(sljit_sw);
- }
- bracketlen = 1 + LINK_SIZE;
- break;
- case OP_BRA:
- bracketlen = 1 + LINK_SIZE;
- break;
- case OP_CBRA:
- case OP_SCBRA:
- bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_BRAZERO:
- case OP_BRAMINZERO:
- case OP_BRAPOSZERO:
- repeat_check = FALSE;
- size = 1;
- break;
- CASE_ITERATOR_PRIVATE_DATA_1
- space = 1;
- size = -2;
- break;
- CASE_ITERATOR_PRIVATE_DATA_2A
- space = 2;
- size = -2;
- break;
- CASE_ITERATOR_PRIVATE_DATA_2B
- space = 2;
- size = -(2 + IMM2_SIZE);
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- space = 1;
- size = 1;
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
- space = 2;
- size = 1;
- break;
- case OP_TYPEUPTO:
- if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
- space = 2;
- size = 1 + IMM2_SIZE;
- break;
- case OP_TYPEMINUPTO:
- space = 2;
- size = 1 + IMM2_SIZE;
- break;
- case OP_CLASS:
- case OP_NCLASS:
- space = get_class_iterator_size(cc + size);
- size = 1 + 32 / sizeof(PCRE2_UCHAR);
- break;
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- space = get_class_iterator_size(cc + size);
- size = GET(cc, 1);
- break;
- #endif
- default:
- cc = next_opcode(common, cc);
- SLJIT_ASSERT(cc != NULL);
- break;
- }
- /* Character iterators, which are not inside a repeated bracket,
- gets a private slot instead of allocating it on the stack. */
- if (space > 0 && cc >= end)
- {
- common->private_data_ptrs[cc - common->start] = private_data_ptr;
- private_data_ptr += sizeof(sljit_sw) * space;
- }
- if (size != 0)
- {
- if (size < 0)
- {
- cc += -size;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- }
- else
- cc += size;
- }
- if (bracketlen > 0)
- {
- if (cc >= end)
- {
- end = bracketend(cc);
- if (end[-1 - LINK_SIZE] == OP_KET)
- end = NULL;
- }
- cc += bracketlen;
- }
- }
- *private_data_start = private_data_ptr;
- }
- /* Returns with a frame_types (always < 0) if no need for frame. */
- static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
- {
- int length = 0;
- int possessive = 0;
- BOOL stack_restore = FALSE;
- BOOL setsom_found = recursive;
- BOOL setmark_found = recursive;
- /* The last capture is a local variable even for recursions. */
- BOOL capture_last_found = FALSE;
- #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
- SLJIT_ASSERT(common->control_head_ptr != 0);
- *needs_control_head = TRUE;
- #else
- *needs_control_head = FALSE;
- #endif
- if (ccend == NULL)
- {
- ccend = bracketend(cc) - (1 + LINK_SIZE);
- if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
- {
- possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
- /* This is correct regardless of common->capture_last_ptr. */
- capture_last_found = TRUE;
- }
- cc = next_opcode(common, cc);
- }
- SLJIT_ASSERT(cc != NULL);
- while (cc < ccend)
- switch(*cc)
- {
- case OP_SET_SOM:
- SLJIT_ASSERT(common->has_set_som);
- stack_restore = TRUE;
- if (!setsom_found)
- {
- length += 2;
- setsom_found = TRUE;
- }
- cc += 1;
- break;
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_THEN_ARG:
- SLJIT_ASSERT(common->mark_ptr != 0);
- stack_restore = TRUE;
- if (!setmark_found)
- {
- length += 2;
- setmark_found = TRUE;
- }
- if (common->control_head_ptr != 0)
- *needs_control_head = TRUE;
- cc += 1 + 2 + cc[1];
- break;
- case OP_RECURSE:
- stack_restore = TRUE;
- if (common->has_set_som && !setsom_found)
- {
- length += 2;
- setsom_found = TRUE;
- }
- if (common->mark_ptr != 0 && !setmark_found)
- {
- length += 2;
- setmark_found = TRUE;
- }
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- length += 2;
- capture_last_found = TRUE;
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_CBRA:
- case OP_CBRAPOS:
- case OP_SCBRA:
- case OP_SCBRAPOS:
- stack_restore = TRUE;
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- length += 2;
- capture_last_found = TRUE;
- }
- length += 3;
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_THEN:
- stack_restore = TRUE;
- if (common->control_head_ptr != 0)
- *needs_control_head = TRUE;
- cc ++;
- break;
- default:
- stack_restore = TRUE;
- /* Fall through. */
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
- case OP_NOTPROP:
- case OP_PROP:
- case OP_ANYNL:
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- case OP_EXTUNI:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_EXACT:
- case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_POSQUERY:
- case OP_POSUPTO:
- case OP_EXACTI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
- case OP_POSQUERYI:
- case OP_POSUPTOI:
- case OP_NOTEXACT:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSUPTO:
- case OP_NOTEXACTI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERYI:
- case OP_NOTPOSUPTOI:
- case OP_TYPEEXACT:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- case OP_TYPEPOSUPTO:
- case OP_CLASS:
- case OP_NCLASS:
- case OP_XCLASS:
- case OP_CALLOUT:
- case OP_CALLOUT_STR:
- cc = next_opcode(common, cc);
- SLJIT_ASSERT(cc != NULL);
- break;
- }
- /* Possessive quantifiers can use a special case. */
- if (SLJIT_UNLIKELY(possessive == length))
- return stack_restore ? no_frame : no_stack;
- if (length > 0)
- return length + 1;
- return stack_restore ? no_frame : no_stack;
- }
- static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
- {
- DEFINE_COMPILER;
- BOOL setsom_found = FALSE;
- BOOL setmark_found = FALSE;
- /* The last capture is a local variable even for recursions. */
- BOOL capture_last_found = FALSE;
- int offset;
- /* >= 1 + shortest item size (2) */
- SLJIT_UNUSED_ARG(stacktop);
- SLJIT_ASSERT(stackpos >= stacktop + 2);
- stackpos = STACK(stackpos);
- if (ccend == NULL)
- {
- ccend = bracketend(cc) - (1 + LINK_SIZE);
- if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
- cc = next_opcode(common, cc);
- }
- SLJIT_ASSERT(cc != NULL);
- while (cc < ccend)
- switch(*cc)
- {
- case OP_SET_SOM:
- SLJIT_ASSERT(common->has_set_som);
- if (!setsom_found)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- setsom_found = TRUE;
- }
- cc += 1;
- break;
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_THEN_ARG:
- SLJIT_ASSERT(common->mark_ptr != 0);
- if (!setmark_found)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- setmark_found = TRUE;
- }
- cc += 1 + 2 + cc[1];
- break;
- case OP_RECURSE:
- if (common->has_set_som && !setsom_found)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- setsom_found = TRUE;
- }
- if (common->mark_ptr != 0 && !setmark_found)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- setmark_found = TRUE;
- }
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- capture_last_found = TRUE;
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_CBRA:
- case OP_CBRAPOS:
- case OP_SCBRA:
- case OP_SCBRAPOS:
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- capture_last_found = TRUE;
- }
- offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
- stackpos -= (int)sizeof(sljit_sw);
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- default:
- cc = next_opcode(common, cc);
- SLJIT_ASSERT(cc != NULL);
- break;
- }
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
- SLJIT_ASSERT(stackpos == STACK(stacktop));
- }
- #define RECURSE_TMP_REG_COUNT 3
- typedef struct delayed_mem_copy_status {
- struct sljit_compiler *compiler;
- int store_bases[RECURSE_TMP_REG_COUNT];
- int store_offsets[RECURSE_TMP_REG_COUNT];
- int tmp_regs[RECURSE_TMP_REG_COUNT];
- int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
- int next_tmp_reg;
- } delayed_mem_copy_status;
- static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
- {
- int i;
- for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
- {
- SLJIT_ASSERT(status->tmp_regs[i] >= 0);
- SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
- status->store_bases[i] = -1;
- }
- status->next_tmp_reg = 0;
- status->compiler = common->compiler;
- }
- static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
- int store_base, sljit_sw store_offset)
- {
- struct sljit_compiler *compiler = status->compiler;
- int next_tmp_reg = status->next_tmp_reg;
- int tmp_reg = status->tmp_regs[next_tmp_reg];
- SLJIT_ASSERT(load_base > 0 && store_base > 0);
- if (status->store_bases[next_tmp_reg] == -1)
- {
- /* Preserve virtual registers. */
- if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
- OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
- }
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
- OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
- status->store_bases[next_tmp_reg] = store_base;
- status->store_offsets[next_tmp_reg] = store_offset;
- status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
- }
- static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
- {
- struct sljit_compiler *compiler = status->compiler;
- int next_tmp_reg = status->next_tmp_reg;
- int tmp_reg, saved_tmp_reg, i;
- for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
- {
- if (status->store_bases[next_tmp_reg] != -1)
- {
- tmp_reg = status->tmp_regs[next_tmp_reg];
- saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
- OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
- /* Restore virtual registers. */
- if (sljit_get_register_index(saved_tmp_reg) < 0)
- OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
- }
- next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
- }
- }
- #undef RECURSE_TMP_REG_COUNT
- static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
- BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
- {
- int length = 1;
- int size;
- PCRE2_SPTR alternative;
- BOOL quit_found = FALSE;
- BOOL accept_found = FALSE;
- BOOL setsom_found = FALSE;
- BOOL setmark_found = FALSE;
- BOOL capture_last_found = FALSE;
- BOOL control_head_found = FALSE;
- #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
- SLJIT_ASSERT(common->control_head_ptr != 0);
- control_head_found = TRUE;
- #endif
- /* Calculate the sum of the private machine words. */
- while (cc < ccend)
- {
- size = 0;
- switch(*cc)
- {
- case OP_SET_SOM:
- SLJIT_ASSERT(common->has_set_som);
- setsom_found = TRUE;
- cc += 1;
- break;
- case OP_RECURSE:
- if (common->has_set_som)
- setsom_found = TRUE;
- if (common->mark_ptr != 0)
- setmark_found = TRUE;
- if (common->capture_last_ptr != 0)
- capture_last_found = TRUE;
- cc += 1 + LINK_SIZE;
- break;
- case OP_KET:
- if (PRIVATE_DATA(cc) != 0)
- {
- length++;
- SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
- cc += PRIVATE_DATA(cc + 1);
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_ONCE:
- case OP_BRAPOS:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCOND:
- length++;
- SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
- cc += 1 + LINK_SIZE;
- break;
- case OP_CBRA:
- case OP_SCBRA:
- length += 2;
- if (common->capture_last_ptr != 0)
- capture_last_found = TRUE;
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- length++;
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- length += 2 + 2;
- if (common->capture_last_ptr != 0)
- capture_last_found = TRUE;
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_COND:
- /* Might be a hidden SCOND. */
- alternative = cc + GET(cc, 1);
- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
- length++;
- cc += 1 + LINK_SIZE;
- break;
- CASE_ITERATOR_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc) != 0)
- length++;
- cc += 2;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- CASE_ITERATOR_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc) != 0)
- length += 2;
- cc += 2;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- CASE_ITERATOR_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc) != 0)
- length += 2;
- cc += 2 + IMM2_SIZE;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc) != 0)
- length++;
- cc += 1;
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc) != 0)
- length += 2;
- cc += 1;
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc) != 0)
- length += 2;
- cc += 1 + IMM2_SIZE;
- break;
- case OP_CLASS:
- case OP_NCLASS:
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
- #else
- size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
- #endif
- if (PRIVATE_DATA(cc) != 0)
- length += get_class_iterator_size(cc + size);
- cc += size;
- break;
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_THEN_ARG:
- SLJIT_ASSERT(common->mark_ptr != 0);
- if (!setmark_found)
- setmark_found = TRUE;
- if (common->control_head_ptr != 0)
- control_head_found = TRUE;
- if (*cc != OP_MARK)
- quit_found = TRUE;
- cc += 1 + 2 + cc[1];
- break;
- case OP_PRUNE:
- case OP_SKIP:
- case OP_COMMIT:
- quit_found = TRUE;
- cc++;
- break;
- case OP_SKIP_ARG:
- quit_found = TRUE;
- cc += 1 + 2 + cc[1];
- break;
- case OP_THEN:
- SLJIT_ASSERT(common->control_head_ptr != 0);
- quit_found = TRUE;
- if (!control_head_found)
- control_head_found = TRUE;
- cc++;
- break;
- case OP_ACCEPT:
- case OP_ASSERT_ACCEPT:
- accept_found = TRUE;
- cc++;
- break;
- default:
- cc = next_opcode(common, cc);
- SLJIT_ASSERT(cc != NULL);
- break;
- }
- }
- SLJIT_ASSERT(cc == ccend);
- if (control_head_found)
- length++;
- if (capture_last_found)
- length++;
- if (quit_found)
- {
- if (setsom_found)
- length++;
- if (setmark_found)
- length++;
- }
- *needs_control_head = control_head_found;
- *has_quit = quit_found;
- *has_accept = accept_found;
- return length;
- }
- enum copy_recurse_data_types {
- recurse_copy_from_global,
- recurse_copy_private_to_global,
- recurse_copy_shared_to_global,
- recurse_copy_kept_shared_to_global,
- recurse_swap_global
- };
- static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
- int type, int stackptr, int stacktop, BOOL has_quit)
- {
- delayed_mem_copy_status status;
- PCRE2_SPTR alternative;
- sljit_sw private_srcw[2];
- sljit_sw shared_srcw[3];
- sljit_sw kept_shared_srcw[2];
- int private_count, shared_count, kept_shared_count;
- int from_sp, base_reg, offset, i;
- BOOL setsom_found = FALSE;
- BOOL setmark_found = FALSE;
- BOOL capture_last_found = FALSE;
- BOOL control_head_found = FALSE;
- #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
- SLJIT_ASSERT(common->control_head_ptr != 0);
- control_head_found = TRUE;
- #endif
- switch (type)
- {
- case recurse_copy_from_global:
- from_sp = TRUE;
- base_reg = STACK_TOP;
- break;
- case recurse_copy_private_to_global:
- case recurse_copy_shared_to_global:
- case recurse_copy_kept_shared_to_global:
- from_sp = FALSE;
- base_reg = STACK_TOP;
- break;
- default:
- SLJIT_ASSERT(type == recurse_swap_global);
- from_sp = FALSE;
- base_reg = TMP2;
- break;
- }
- stackptr = STACK(stackptr);
- stacktop = STACK(stacktop);
- status.tmp_regs[0] = TMP1;
- status.saved_tmp_regs[0] = TMP1;
- if (base_reg != TMP2)
- {
- status.tmp_regs[1] = TMP2;
- status.saved_tmp_regs[1] = TMP2;
- }
- else
- {
- status.saved_tmp_regs[1] = RETURN_ADDR;
- if (sljit_get_register_index (RETURN_ADDR) == -1)
- status.tmp_regs[1] = STR_PTR;
- else
- status.tmp_regs[1] = RETURN_ADDR;
- }
- status.saved_tmp_regs[2] = TMP3;
- if (sljit_get_register_index (TMP3) == -1)
- status.tmp_regs[2] = STR_END;
- else
- status.tmp_regs[2] = TMP3;
- delayed_mem_copy_init(&status, common);
- if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
- {
- SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
- if (!from_sp)
- delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
- if (from_sp || type == recurse_swap_global)
- delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
- }
- stackptr += sizeof(sljit_sw);
- #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
- if (type != recurse_copy_shared_to_global)
- {
- if (!from_sp)
- delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
- if (from_sp || type == recurse_swap_global)
- delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
- }
- stackptr += sizeof(sljit_sw);
- #endif
- while (cc < ccend)
- {
- private_count = 0;
- shared_count = 0;
- kept_shared_count = 0;
- switch(*cc)
- {
- case OP_SET_SOM:
- SLJIT_ASSERT(common->has_set_som);
- if (has_quit && !setsom_found)
- {
- kept_shared_srcw[0] = OVECTOR(0);
- kept_shared_count = 1;
- setsom_found = TRUE;
- }
- cc += 1;
- break;
- case OP_RECURSE:
- if (has_quit)
- {
- if (common->has_set_som && !setsom_found)
- {
- kept_shared_srcw[0] = OVECTOR(0);
- kept_shared_count = 1;
- setsom_found = TRUE;
- }
- if (common->mark_ptr != 0 && !setmark_found)
- {
- kept_shared_srcw[kept_shared_count] = common->mark_ptr;
- kept_shared_count++;
- setmark_found = TRUE;
- }
- }
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- shared_srcw[0] = common->capture_last_ptr;
- shared_count = 1;
- capture_last_found = TRUE;
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_KET:
- if (PRIVATE_DATA(cc) != 0)
- {
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
- cc += PRIVATE_DATA(cc + 1);
- }
- cc += 1 + LINK_SIZE;
- break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_ONCE:
- case OP_BRAPOS:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCOND:
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- cc += 1 + LINK_SIZE;
- break;
- case OP_CBRA:
- case OP_SCBRA:
- offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
- shared_srcw[0] = OVECTOR(offset);
- shared_srcw[1] = OVECTOR(offset + 1);
- shared_count = 2;
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- shared_srcw[2] = common->capture_last_ptr;
- shared_count = 3;
- capture_last_found = TRUE;
- }
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- {
- private_count = 1;
- private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
- }
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
- shared_srcw[0] = OVECTOR(offset);
- shared_srcw[1] = OVECTOR(offset + 1);
- shared_count = 2;
- if (common->capture_last_ptr != 0 && !capture_last_found)
- {
- shared_srcw[2] = common->capture_last_ptr;
- shared_count = 3;
- capture_last_found = TRUE;
- }
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
- case OP_COND:
- /* Might be a hidden SCOND. */
- alternative = cc + GET(cc, 1);
- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
- {
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- }
- cc += 1 + LINK_SIZE;
- break;
- CASE_ITERATOR_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- {
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- }
- cc += 2;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- CASE_ITERATOR_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
- {
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
- }
- cc += 2;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- CASE_ITERATOR_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
- {
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
- }
- cc += 2 + IMM2_SIZE;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
- #endif
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- {
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- }
- cc += 1;
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
- {
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
- }
- cc += 1;
- break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
- {
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
- }
- cc += 1 + IMM2_SIZE;
- break;
- case OP_CLASS:
- case OP_NCLASS:
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
- #else
- i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
- #endif
- if (PRIVATE_DATA(cc) != 0)
- switch(get_class_iterator_size(cc + i))
- {
- case 1:
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- break;
- case 2:
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- cc += i;
- break;
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_THEN_ARG:
- SLJIT_ASSERT(common->mark_ptr != 0);
- if (has_quit && !setmark_found)
- {
- kept_shared_srcw[0] = common->mark_ptr;
- kept_shared_count = 1;
- setmark_found = TRUE;
- }
- if (common->control_head_ptr != 0 && !control_head_found)
- {
- shared_srcw[0] = common->control_head_ptr;
- shared_count = 1;
- control_head_found = TRUE;
- }
- cc += 1 + 2 + cc[1];
- break;
- case OP_THEN:
- SLJIT_ASSERT(common->control_head_ptr != 0);
- if (!control_head_found)
- {
- shared_srcw[0] = common->control_head_ptr;
- shared_count = 1;
- control_head_found = TRUE;
- }
- cc++;
- break;
- default:
- cc = next_opcode(common, cc);
- SLJIT_ASSERT(cc != NULL);
- break;
- }
- if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
- {
- SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
- for (i = 0; i < private_count; i++)
- {
- SLJIT_ASSERT(private_srcw[i] != 0);
- if (!from_sp)
- delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
- if (from_sp || type == recurse_swap_global)
- delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
- stackptr += sizeof(sljit_sw);
- }
- }
- else
- stackptr += sizeof(sljit_sw) * private_count;
- if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
- {
- SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
- for (i = 0; i < shared_count; i++)
- {
- SLJIT_ASSERT(shared_srcw[i] != 0);
- if (!from_sp)
- delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
- if (from_sp || type == recurse_swap_global)
- delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
- stackptr += sizeof(sljit_sw);
- }
- }
- else
- stackptr += sizeof(sljit_sw) * shared_count;
- if (type != recurse_copy_private_to_global && type != recurse_swap_global)
- {
- SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
- for (i = 0; i < kept_shared_count; i++)
- {
- SLJIT_ASSERT(kept_shared_srcw[i] != 0);
- if (!from_sp)
- delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
- if (from_sp || type == recurse_swap_global)
- delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
- stackptr += sizeof(sljit_sw);
- }
- }
- else
- stackptr += sizeof(sljit_sw) * kept_shared_count;
- }
- SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
- delayed_mem_copy_finish(&status);
- }
- static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
- {
- PCRE2_SPTR end = bracketend(cc);
- BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
- /* Assert captures then. */
- if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
- current_offset = NULL;
- /* Conditional block does not. */
- if (*cc == OP_COND || *cc == OP_SCOND)
- has_alternatives = FALSE;
- cc = next_opcode(common, cc);
- if (has_alternatives)
- current_offset = common->then_offsets + (cc - common->start);
- while (cc < end)
- {
- if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
- cc = set_then_offsets(common, cc, current_offset);
- else
- {
- if (*cc == OP_ALT && has_alternatives)
- current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
- if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
- *current_offset = 1;
- cc = next_opcode(common, cc);
- }
- }
- return end;
- }
- #undef CASE_ITERATOR_PRIVATE_DATA_1
- #undef CASE_ITERATOR_PRIVATE_DATA_2A
- #undef CASE_ITERATOR_PRIVATE_DATA_2B
- #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
- {
- return (value & (value - 1)) == 0;
- }
- static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
- {
- while (list)
- {
- /* sljit_set_label is clever enough to do nothing
- if either the jump or the label is NULL. */
- SET_LABEL(list->jump, label);
- list = list->next;
- }
- }
- static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
- {
- jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
- if (list_item)
- {
- list_item->next = *list;
- list_item->jump = jump;
- *list = list_item;
- }
- }
- static void add_stub(compiler_common *common, struct sljit_jump *start)
- {
- DEFINE_COMPILER;
- stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
- if (list_item)
- {
- list_item->start = start;
- list_item->quit = LABEL();
- list_item->next = common->stubs;
- common->stubs = list_item;
- }
- }
- static void flush_stubs(compiler_common *common)
- {
- DEFINE_COMPILER;
- stub_list *list_item = common->stubs;
- while (list_item)
- {
- JUMPHERE(list_item->start);
- add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
- JUMPTO(SLJIT_JUMP, list_item->quit);
- list_item = list_item->next;
- }
- common->stubs = NULL;
- }
- static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
- {
- DEFINE_COMPILER;
- label_addr_list *label_addr;
- label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
- if (label_addr == NULL)
- return;
- label_addr->label = LABEL();
- label_addr->update_addr = update_addr;
- label_addr->next = common->label_addrs;
- common->label_addrs = label_addr;
- }
- static SLJIT_INLINE void count_match(compiler_common *common)
- {
- DEFINE_COMPILER;
- OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
- add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
- }
- static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
- {
- /* May destroy all locals and registers except TMP2. */
- DEFINE_COMPILER;
- SLJIT_ASSERT(size > 0);
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
- #ifdef DESTROY_REGISTERS
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
- #endif
- add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
- }
- static SLJIT_INLINE void free_stack(compiler_common *common, int size)
- {
- DEFINE_COMPILER;
- SLJIT_ASSERT(size > 0);
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
- }
- static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
- {
- DEFINE_COMPILER;
- sljit_uw *result;
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return NULL;
- result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
- if (SLJIT_UNLIKELY(result == NULL))
- {
- sljit_set_compiler_memory_error(compiler);
- return NULL;
- }
- *(void**)result = common->read_only_data_head;
- common->read_only_data_head = (void *)result;
- return result + 1;
- }
- static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
- {
- DEFINE_COMPILER;
- struct sljit_label *loop;
- sljit_s32 i;
- /* At this point we can freely use all temporary registers. */
- SLJIT_ASSERT(length > 1);
- /* TMP1 returns with begin - 1. */
- OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
- if (length < 8)
- {
- for (i = 1; i < length; i++)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
- }
- else
- {
- if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
- {
- GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
- loop = LABEL();
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
- }
- else
- {
- GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
- loop = LABEL();
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
- OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
- }
- }
- }
- static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
- {
- DEFINE_COMPILER;
- sljit_s32 i;
- SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
- OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
- }
- static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
- {
- DEFINE_COMPILER;
- struct sljit_label *loop;
- int i;
- SLJIT_ASSERT(length > 1);
- /* OVECTOR(1) contains the "string begin - 1" constant. */
- if (length > 2)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
- if (length < 8)
- {
- for (i = 2; i < length; i++)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
- }
- else
- {
- if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
- {
- GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
- loop = LABEL();
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
- }
- else
- {
- GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
- loop = LABEL();
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
- OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
- }
- }
- OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
- if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
- if (common->control_head_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
- }
- static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
- {
- while (current != NULL)
- {
- switch (current[1])
- {
- case type_then_trap:
- break;
- case type_mark:
- if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
- return current[3];
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
- current = (sljit_sw*)current[0];
- }
- return 0;
- }
- static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
- {
- DEFINE_COMPILER;
- struct sljit_label *loop;
- BOOL has_pre;
- /* At this point we can freely use all registers. */
- OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
- if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
- OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
- if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
- OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
- SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
- has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
- GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
- OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
- loop = LABEL();
- if (has_pre)
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
- else
- {
- OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
- OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
- }
- OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
- OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
- /* Copy the integer value to the output buffer */
- #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
- #endif
- SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
- OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, loop);
- /* Calculate the return value, which is the maximum ovector value. */
- if (topbracket > 1)
- {
- if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
- {
- GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
- /* OVECTOR(0) is never equal to SLJIT_S2. */
- loop = LABEL();
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
- OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
- CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
- }
- else
- {
- GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
- /* OVECTOR(0) is never equal to SLJIT_S2. */
- loop = LABEL();
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
- OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
- OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
- CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
- }
- }
- else
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
- }
- static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
- {
- DEFINE_COMPILER;
- sljit_s32 mov_opcode;
- SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
- SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
- && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
- OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
- common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
- /* Store match begin and end. */
- OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
- OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data));
- mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
- OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
- #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
- #endif
- OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
- #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
- #endif
- OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
- JUMPTO(SLJIT_JUMP, quit);
- }
- static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
- {
- /* May destroy TMP1. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
- {
- /* The value of -1 must be kept for start_used_ptr! */
- OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
- /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
- is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
- jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
- JUMPHERE(jump);
- }
- else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
- {
- jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
- JUMPHERE(jump);
- }
- }
- static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
- {
- /* Detects if the character has an othercase. */
- unsigned int c;
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- GETCHAR(c, cc);
- if (c > 127)
- {
- return c != UCD_OTHERCASE(c);
- }
- #if PCRE2_CODE_UNIT_WIDTH != 8
- return common->fcc[c] != c;
- #endif
- }
- else
- #endif
- c = *cc;
- return MAX_255(c) ? common->fcc[c] != c : FALSE;
- }
- static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
- {
- /* Returns with the othercase. */
- #ifdef SUPPORT_UNICODE
- if (common->utf && c > 127)
- {
- return UCD_OTHERCASE(c);
- }
- #endif
- return TABLE_GET(c, common->fcc, c);
- }
- static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
- {
- /* Detects if the character and its othercase has only 1 bit difference. */
- unsigned int c, oc, bit;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- int n;
- #endif
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- GETCHAR(c, cc);
- if (c <= 127)
- oc = common->fcc[c];
- else
- {
- oc = UCD_OTHERCASE(c);
- }
- }
- else
- {
- c = *cc;
- oc = TABLE_GET(c, common->fcc, c);
- }
- #else
- c = *cc;
- oc = TABLE_GET(c, common->fcc, c);
- #endif
- SLJIT_ASSERT(c != oc);
- bit = c ^ oc;
- /* Optimized for English alphabet. */
- if (c <= 127 && bit == 0x20)
- return (0 << 8) | 0x20;
- /* Since c != oc, they must have at least 1 bit difference. */
- if (!is_powerof2(bit))
- return 0;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- #ifdef SUPPORT_UNICODE
- if (common->utf && c > 127)
- {
- n = GET_EXTRALEN(*cc);
- while ((bit & 0x3f) == 0)
- {
- n--;
- bit >>= 6;
- }
- return (n << 8) | bit;
- }
- #endif /* SUPPORT_UNICODE */
- return (0 << 8) | bit;
- #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- #ifdef SUPPORT_UNICODE
- if (common->utf && c > 65535)
- {
- if (bit >= (1 << 10))
- bit >>= 10;
- else
- return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
- }
- #endif /* SUPPORT_UNICODE */
- return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
- #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
- }
- static void check_partial(compiler_common *common, BOOL force)
- {
- /* Checks whether a partial matching is occurred. Does not modify registers. */
- DEFINE_COMPILER;
- struct sljit_jump *jump = NULL;
- SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
- if (common->mode == PCRE2_JIT_COMPLETE)
- return;
- if (!force)
- jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
- else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
- jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
- if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
- else
- {
- if (common->partialmatchlabel != NULL)
- JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
- else
- add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
- }
- if (jump != NULL)
- JUMPHERE(jump);
- }
- static void check_str_end(compiler_common *common, jump_list **end_reached)
- {
- /* Does not affect registers. Usually used in a tight spot. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- if (common->mode == PCRE2_JIT_COMPLETE)
- {
- add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- return;
- }
- jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
- {
- add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
- add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
- }
- else
- {
- add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
- if (common->partialmatchlabel != NULL)
- JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
- else
- add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
- }
- JUMPHERE(jump);
- }
- static void detect_partial_match(compiler_common *common, jump_list **backtracks)
- {
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- if (common->mode == PCRE2_JIT_COMPLETE)
- {
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- return;
- }
- /* Partial matching mode. */
- jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
- if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- }
- else
- {
- if (common->partialmatchlabel != NULL)
- JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
- else
- add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
- }
- JUMPHERE(jump);
- }
- static void peek_char(compiler_common *common, sljit_u32 max)
- {
- /* Reads the character into TMP1, keeps STR_PTR.
- Does not check STR_END. TMP2 Destroyed. */
- DEFINE_COMPILER;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- struct sljit_jump *jump;
- #endif
- SLJIT_UNUSED_ARG(max);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- if (max < 128) return;
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- JUMPHERE(jump);
- }
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
- if (common->utf)
- {
- if (max < 0xd800) return;
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
- /* TMP2 contains the high surrogate. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- JUMPHERE(jump);
- }
- #endif
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
- {
- /* Tells whether the character codes below 128 are enough
- to determine a match. */
- const sljit_u8 value = nclass ? 0xff : 0;
- const sljit_u8 *end = bitset + 32;
- bitset += 16;
- do
- {
- if (*bitset++ != value)
- return FALSE;
- }
- while (bitset < end);
- return TRUE;
- }
- static void read_char7_type(compiler_common *common, BOOL full_read)
- {
- /* Reads the precise character type of a character into TMP1, if the character
- is less than 128. Otherwise it returns with zero. Does not check STR_END. The
- full_read argument tells whether characters above max are accepted or not. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- SLJIT_ASSERT(common->utf);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- if (full_read)
- {
- jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- JUMPHERE(jump);
- }
- }
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
- static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
- {
- /* Reads the precise value of a character into TMP1, if the character is
- between min and max (c >= min && c <= max). Otherwise it returns with a value
- outside the range. Does not check STR_END. */
- DEFINE_COMPILER;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- struct sljit_jump *jump;
- #endif
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- struct sljit_jump *jump2;
- #endif
- SLJIT_UNUSED_ARG(update_str_ptr);
- SLJIT_UNUSED_ARG(min);
- SLJIT_UNUSED_ARG(max);
- SLJIT_ASSERT(min <= max);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- if (max < 128 && !update_str_ptr) return;
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- if (min >= 0x10000)
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
- if (update_str_ptr)
- OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
- if (!update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- JUMPHERE(jump2);
- if (update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
- }
- else if (min >= 0x800 && max <= 0xffff)
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
- if (update_str_ptr)
- OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- if (!update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- JUMPHERE(jump2);
- if (update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
- }
- else if (max >= 0x800)
- add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
- else if (max < 128)
- {
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- }
- else
- {
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (!update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- else
- OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- if (update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
- }
- JUMPHERE(jump);
- }
- #endif
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
- if (common->utf)
- {
- if (max >= 0x10000)
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
- /* TMP2 contains the high surrogate. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- JUMPHERE(jump);
- return;
- }
- if (max < 0xd800 && !update_str_ptr) return;
- /* Skip low surrogate if necessary. */
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
- if (update_str_ptr)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (max >= 0xd800)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
- JUMPHERE(jump);
- }
- #endif
- }
- static SLJIT_INLINE void read_char(compiler_common *common)
- {
- read_char_range(common, 0, READ_CHAR_MAX, TRUE);
- }
- static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
- {
- /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
- DEFINE_COMPILER;
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- struct sljit_jump *jump;
- #endif
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- struct sljit_jump *jump2;
- #endif
- SLJIT_UNUSED_ARG(update_str_ptr);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- /* This can be an extra read in some situations, but hopefully
- it is needed in most cases. */
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
- if (!update_str_ptr)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- JUMPHERE(jump2);
- }
- else
- add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
- JUMPHERE(jump);
- return;
- }
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
- #if PCRE2_CODE_UNIT_WIDTH != 8
- /* The ctypes array contains only 256 values. */
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
- #endif
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- #if PCRE2_CODE_UNIT_WIDTH != 8
- JUMPHERE(jump);
- #endif
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
- if (common->utf && update_str_ptr)
- {
- /* Skip low surrogate if necessary. */
- OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
- jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPHERE(jump);
- }
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
- }
- static void skip_char_back(compiler_common *common)
- {
- /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
- DEFINE_COMPILER;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- #if PCRE2_CODE_UNIT_WIDTH == 8
- struct sljit_label *label;
- if (common->utf)
- {
- label = LABEL();
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
- return;
- }
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- if (common->utf)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- /* Skip low surrogate if necessary. */
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- return;
- }
- #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
- {
- /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- if (nltype == NLTYPE_ANY)
- {
- add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
- }
- else if (nltype == NLTYPE_ANYCRLF)
- {
- if (jumpifmatch)
- {
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
- }
- else
- {
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
- JUMPHERE(jump);
- }
- }
- else
- {
- SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
- add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
- }
- }
- #ifdef SUPPORT_UNICODE
- #if PCRE2_CODE_UNIT_WIDTH == 8
- static void do_utfreadchar(compiler_common *common)
- {
- /* Fast decoding a UTF-8 character. TMP1 contains the first byte
- of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- /* Searching for the first zero. */
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- jump = JUMP(SLJIT_NOT_ZERO);
- /* Two byte sequence. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- JUMPHERE(jump);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
- jump = JUMP(SLJIT_NOT_ZERO);
- /* Three byte sequence. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- /* Four byte sequence. */
- JUMPHERE(jump);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
- OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- static void do_utfreadchar16(compiler_common *common)
- {
- /* Fast decoding a UTF-8 character. TMP1 contains the first byte
- of the character (>= 0xc0). Return value in TMP1. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- /* Searching for the first zero. */
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- jump = JUMP(SLJIT_NOT_ZERO);
- /* Two byte sequence. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- JUMPHERE(jump);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
- /* This code runs only in 8 bit mode. No need to shift the value. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- /* Three byte sequence. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- static void do_utfreadtype8(compiler_common *common)
- {
- /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
- of the character (>= 0xc0). Return value in TMP1. */
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- struct sljit_jump *compare;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
- jump = JUMP(SLJIT_NOT_ZERO);
- /* Two byte sequence. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
- /* The upper 5 bits are known at this point. */
- compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- JUMPHERE(compare);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- /* We only have types for characters less than 256. */
- JUMPHERE(jump);
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
- #define UCD_BLOCK_MASK 127
- #define UCD_BLOCK_SHIFT 7
- static void do_getucd(compiler_common *common)
- {
- /* Search the UCD record for the character comes in TMP1.
- Returns chartype in TMP1 and UCD offset in TMP2. */
- DEFINE_COMPILER;
- #if PCRE2_CODE_UNIT_WIDTH == 32
- struct sljit_jump *jump;
- #endif
- #if defined SLJIT_DEBUG && SLJIT_DEBUG
- /* dummy_ucd_record */
- const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
- SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
- SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
- #endif
- SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- #if PCRE2_CODE_UNIT_WIDTH == 32
- if (!common->utf)
- {
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
- JUMPHERE(jump);
- }
- #endif
- OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
- OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- #endif /* SUPPORT_UNICODE */
- static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_label *mainloop;
- struct sljit_label *newlinelabel = NULL;
- struct sljit_jump *start;
- struct sljit_jump *end = NULL;
- struct sljit_jump *end2 = NULL;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- struct sljit_jump *singlechar;
- #endif
- jump_list *newline = NULL;
- sljit_u32 overall_options = common->re->overall_options;
- BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
- BOOL newlinecheck = FALSE;
- BOOL readuchar = FALSE;
- if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
- && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
- newlinecheck = TRUE;
- SLJIT_ASSERT(common->abort_label == NULL);
- if ((overall_options & PCRE2_FIRSTLINE) != 0)
- {
- /* Search for the end of the first line. */
- SLJIT_ASSERT(common->match_end_ptr != 0);
- OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- mainloop = LABEL();
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
- JUMPHERE(end);
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- else
- {
- end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- mainloop = LABEL();
- /* Continual stores does not cause data dependency. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- check_newlinechar(common, common->nltype, &newline, TRUE);
- CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
- JUMPHERE(end);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
- set_jumps(newline, LABEL());
- }
- OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
- }
- else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
- {
- /* Check whether offset limit is set and valid. */
- SLJIT_ASSERT(common->match_end_ptr != 0);
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
- OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
- end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- #if PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
- #endif
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
- OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
- JUMPHERE(end2);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
- JUMPHERE(end);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
- }
- start = JUMP(SLJIT_JUMP);
- if (newlinecheck)
- {
- newlinelabel = LABEL();
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
- #endif
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- end2 = JUMP(SLJIT_JUMP);
- }
- mainloop = LABEL();
- /* Increasing the STR_PTR here requires one less jump in the most common case. */
- #ifdef SUPPORT_UNICODE
- if (common->utf) readuchar = TRUE;
- #endif
- if (newlinecheck) readuchar = TRUE;
- if (readuchar)
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- if (newlinecheck)
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- JUMPHERE(singlechar);
- }
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- if (common->utf)
- {
- singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- JUMPHERE(singlechar);
- }
- #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
- JUMPHERE(start);
- if (newlinecheck)
- {
- JUMPHERE(end);
- JUMPHERE(end2);
- }
- return mainloop;
- }
- static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
- {
- sljit_u32 i, count = chars->count;
- if (count == 255)
- return;
- if (count == 0)
- {
- chars->count = 1;
- chars->chars[0] = chr;
- if (last)
- chars->last_count = 1;
- return;
- }
- for (i = 0; i < count; i++)
- if (chars->chars[i] == chr)
- return;
- if (count >= MAX_DIFF_CHARS)
- {
- chars->count = 255;
- return;
- }
- chars->chars[count] = chr;
- chars->count = count + 1;
- if (last)
- chars->last_count++;
- }
- static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
- {
- /* Recursive function, which scans prefix literals. */
- BOOL last, any, class, caseless;
- int len, repeat, len_save, consumed = 0;
- sljit_u32 chr; /* Any unicode character. */
- sljit_u8 *bytes, *bytes_end, byte;
- PCRE2_SPTR alternative, cc_save, oc;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- PCRE2_UCHAR othercase[4];
- #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
- PCRE2_UCHAR othercase[2];
- #else
- PCRE2_UCHAR othercase[1];
- #endif
- repeat = 1;
- while (TRUE)
- {
- if (*rec_count == 0)
- return 0;
- (*rec_count)--;
- last = TRUE;
- any = FALSE;
- class = FALSE;
- caseless = FALSE;
- switch (*cc)
- {
- case OP_CHARI:
- caseless = TRUE;
- /* Fall through */
- case OP_CHAR:
- last = FALSE;
- cc++;
- break;
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- /* Zero width assertions. */
- cc++;
- continue;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- cc = bracketend(cc);
- continue;
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_POSPLUSI:
- caseless = TRUE;
- /* Fall through */
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_POSPLUS:
- cc++;
- break;
- case OP_EXACTI:
- caseless = TRUE;
- /* Fall through */
- case OP_EXACT:
- repeat = GET2(cc, 1);
- last = FALSE;
- cc += 1 + IMM2_SIZE;
- break;
- case OP_QUERYI:
- case OP_MINQUERYI:
- case OP_POSQUERYI:
- caseless = TRUE;
- /* Fall through */
- case OP_QUERY:
- case OP_MINQUERY:
- case OP_POSQUERY:
- len = 1;
- cc++;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
- #endif
- max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
- if (max_chars == 0)
- return consumed;
- last = FALSE;
- break;
- case OP_KET:
- cc += 1 + LINK_SIZE;
- continue;
- case OP_ALT:
- cc += GET(cc, 1);
- continue;
- case OP_ONCE:
- case OP_BRA:
- case OP_BRAPOS:
- case OP_CBRA:
- case OP_CBRAPOS:
- alternative = cc + GET(cc, 1);
- while (*alternative == OP_ALT)
- {
- max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
- if (max_chars == 0)
- return consumed;
- alternative += GET(alternative, 1);
- }
- if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
- cc += IMM2_SIZE;
- cc += 1 + LINK_SIZE;
- continue;
- case OP_CLASS:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
- return consumed;
- #endif
- class = TRUE;
- break;
- case OP_NCLASS:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf) return consumed;
- #endif
- class = TRUE;
- break;
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf) return consumed;
- #endif
- any = TRUE;
- cc += GET(cc, 1);
- break;
- #endif
- case OP_DIGIT:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
- return consumed;
- #endif
- any = TRUE;
- cc++;
- break;
- case OP_WHITESPACE:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
- return consumed;
- #endif
- any = TRUE;
- cc++;
- break;
- case OP_WORDCHAR:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
- return consumed;
- #endif
- any = TRUE;
- cc++;
- break;
- case OP_NOT:
- case OP_NOTI:
- cc++;
- /* Fall through. */
- case OP_NOT_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf) return consumed;
- #endif
- any = TRUE;
- cc++;
- break;
- #ifdef SUPPORT_UNICODE
- case OP_NOTPROP:
- case OP_PROP:
- #if PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf) return consumed;
- #endif
- any = TRUE;
- cc += 1 + 2;
- break;
- #endif
- case OP_TYPEEXACT:
- repeat = GET2(cc, 1);
- cc += 1 + IMM2_SIZE;
- continue;
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf) return consumed;
- #endif
- any = TRUE;
- repeat = GET2(cc, 1);
- cc += 1 + IMM2_SIZE + 1;
- break;
- default:
- return consumed;
- }
- if (any)
- {
- do
- {
- chars->count = 255;
- consumed++;
- if (--max_chars == 0)
- return consumed;
- chars++;
- }
- while (--repeat > 0);
- repeat = 1;
- continue;
- }
- if (class)
- {
- bytes = (sljit_u8*) (cc + 1);
- cc += 1 + 32 / sizeof(PCRE2_UCHAR);
- switch (*cc)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPOSSTAR:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- case OP_CRPOSQUERY:
- max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
- if (max_chars == 0)
- return consumed;
- break;
- default:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRPOSPLUS:
- break;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- case OP_CRPOSRANGE:
- repeat = GET2(cc, 1);
- if (repeat <= 0)
- return consumed;
- break;
- }
- do
- {
- if (bytes[31] & 0x80)
- chars->count = 255;
- else if (chars->count != 255)
- {
- bytes_end = bytes + 32;
- chr = 0;
- do
- {
- byte = *bytes++;
- SLJIT_ASSERT((chr & 0x7) == 0);
- if (byte == 0)
- chr += 8;
- else
- {
- do
- {
- if ((byte & 0x1) != 0)
- add_prefix_char(chr, chars, TRUE);
- byte >>= 1;
- chr++;
- }
- while (byte != 0);
- chr = (chr + 7) & ~7;
- }
- }
- while (chars->count != 255 && bytes < bytes_end);
- bytes = bytes_end - 32;
- }
- consumed++;
- if (--max_chars == 0)
- return consumed;
- chars++;
- }
- while (--repeat > 0);
- switch (*cc)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPOSSTAR:
- return consumed;
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- case OP_CRPOSQUERY:
- cc++;
- break;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- case OP_CRPOSRANGE:
- if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
- return consumed;
- cc += 1 + 2 * IMM2_SIZE;
- break;
- }
- repeat = 1;
- continue;
- }
- len = 1;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
- #endif
- if (caseless && char_has_othercase(common, cc))
- {
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- GETCHAR(chr, cc);
- if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
- return consumed;
- }
- else
- #endif
- {
- chr = *cc;
- othercase[0] = TABLE_GET(chr, common->fcc, chr);
- }
- }
- else
- {
- caseless = FALSE;
- othercase[0] = 0; /* Stops compiler warning - PH */
- }
- len_save = len;
- cc_save = cc;
- while (TRUE)
- {
- oc = othercase;
- do
- {
- len--;
- consumed++;
- chr = *cc;
- add_prefix_char(*cc, chars, len == 0);
- if (caseless)
- add_prefix_char(*oc, chars, len == 0);
- if (--max_chars == 0)
- return consumed;
- chars++;
- cc++;
- oc++;
- }
- while (len > 0);
- if (--repeat == 0)
- break;
- len = len_save;
- cc = cc_save;
- }
- repeat = 1;
- if (last)
- return consumed;
- }
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
- {
- #if PCRE2_CODE_UNIT_WIDTH == 8
- OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
- CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
- #else
- #error "Unknown code width"
- #endif
- }
- #endif
- #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
- {
- #if PCRE2_CODE_UNIT_WIDTH == 8
- OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
- return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
- return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
- #else
- #error "Unknown code width"
- #endif
- }
- #endif
- static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
- {
- sljit_s32 value = (sljit_s32)chr;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- #define SSE2_COMPARE_TYPE_INDEX 0
- return (value << 24) | (value << 16) | (value << 8) | value;
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- #define SSE2_COMPARE_TYPE_INDEX 1
- return (value << 16) | value;
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- #define SSE2_COMPARE_TYPE_INDEX 2
- return value;
- #else
- #error "Unsupported unit width"
- #endif
- }
- static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg)
- {
- #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- sljit_u8 instruction[5];
- #else
- sljit_u8 instruction[4];
- #endif
- SLJIT_ASSERT(dst_xmm_reg < 8);
- /* MOVDQA xmm1, xmm2/m128 */
- #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src_general_reg < 8)
- {
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- instruction[2] = 0x6f;
- instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- else
- {
- instruction[0] = 0x66;
- instruction[1] = 0x41;
- instruction[2] = 0x0f;
- instruction[3] = 0x6f;
- instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7);
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- #else
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- instruction[2] = 0x6f;
- instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
- sljit_emit_op_custom(compiler, instruction, 4);
- #endif
- }
- static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
- sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
- {
- sljit_u8 instruction[4];
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- if (char1 == char2 || bit != 0)
- {
- if (bit != 0)
- {
- /* POR xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- /* PCMPEQB/W/D xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
- instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- else
- {
- /* MOVDQA xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x6f;
- instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* PCMPEQB/W/D xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
- instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* POR xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- }
- static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
- {
- DEFINE_COMPILER;
- struct sljit_label *start;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- struct sljit_label *restart;
- #endif
- struct sljit_jump *quit;
- struct sljit_jump *partial_quit[2];
- sljit_u8 instruction[8];
- sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
- sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
- sljit_s32 data_ind = 0;
- sljit_s32 tmp_ind = 1;
- sljit_s32 cmp1_ind = 2;
- sljit_s32 cmp2_ind = 3;
- sljit_u32 bit = 0;
- SLJIT_UNUSED_ARG(offset);
- if (char1 != char2)
- {
- bit = char1 ^ char2;
- if (!is_powerof2(bit))
- bit = 0;
- }
- partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, &common->failed_match, partial_quit[0]);
- /* First part (unaligned start) */
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
- SLJIT_ASSERT(tmp1_ind < 8);
- /* MOVD xmm, r/m32 */
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- instruction[2] = 0x6e;
- instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- if (char1 != char2)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
- /* MOVD xmm, r/m32 */
- instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
- /* PSHUFD xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x70;
- instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
- instruction[4] = 0;
- sljit_emit_op_custom(compiler, instruction, 5);
- if (char1 != char2)
- {
- /* PSHUFD xmm1, xmm2/m128, imm8 */
- instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 5);
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- restart = LABEL();
- #endif
- OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
- load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
- fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
- /* PMOVMSKB reg, xmm */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xd7;
- instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
- sljit_emit_op_custom(compiler, instruction, 4);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
- /* BSF r32, r/m32 */
- instruction[0] = 0x0f;
- instruction[1] = 0xbc;
- instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 3);
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- quit = JUMP(SLJIT_NOT_ZERO);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- start = LABEL();
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
- partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, &common->failed_match, partial_quit[1]);
- /* Second part (aligned) */
- load_from_mem_sse2(compiler, 0, str_ptr_ind);
- fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
- /* PMOVMSKB reg, xmm */
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- instruction[2] = 0xd7;
- instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* BSF r32, r/m32 */
- instruction[0] = 0x0f;
- instruction[1] = 0xbc;
- instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 3);
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- JUMPTO(SLJIT_ZERO, start);
- JUMPHERE(quit);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- if (common->mode != PCRE2_JIT_COMPLETE)
- {
- JUMPHERE(partial_quit[0]);
- JUMPHERE(partial_quit[1]);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
- CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
- }
- else
- add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf && offset > 0)
- {
- SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
- quit = jump_if_utf_char_start(compiler, TMP1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, restart);
- JUMPHERE(quit);
- }
- #endif
- }
- #ifndef _WIN64
- static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void)
- {
- #if PCRE2_CODE_UNIT_WIDTH == 8
- return 15;
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- return 7;
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- return 3;
- #else
- #error "Unsupported unit width"
- #endif
- }
- static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1,
- PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
- {
- DEFINE_COMPILER;
- sljit_u32 bit1 = 0;
- sljit_u32 bit2 = 0;
- sljit_u32 diff = IN_UCHARS(offs1 - offs2);
- sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
- sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
- sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
- sljit_s32 data1_ind = 0;
- sljit_s32 data2_ind = 1;
- sljit_s32 tmp_ind = 2;
- sljit_s32 cmp1a_ind = 3;
- sljit_s32 cmp1b_ind = 4;
- sljit_s32 cmp2a_ind = 5;
- sljit_s32 cmp2b_ind = 6;
- struct sljit_label *start;
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- struct sljit_label *restart;
- #endif
- struct sljit_jump *jump[2];
- sljit_u8 instruction[8];
- SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
- SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
- SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
- /* Initialize. */
- if (common->match_end_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
- CMOV(SLJIT_LESS, STR_END, TMP1, 0);
- }
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
- add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- /* MOVD xmm, r/m32 */
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- instruction[2] = 0x6e;
- if (char1a == char1b)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
- else
- {
- bit1 = char1a ^ char1b;
- if (is_powerof2(bit1))
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
- }
- else
- {
- bit1 = 0;
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
- }
- }
- instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- if (char1a != char1b)
- {
- instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- if (char2a == char2b)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
- else
- {
- bit2 = char2a ^ char2b;
- if (is_powerof2(bit2))
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
- }
- else
- {
- bit2 = 0;
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
- }
- }
- instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- if (char2a != char2b)
- {
- instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- /* PSHUFD xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x70;
- instruction[4] = 0;
- instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
- sljit_emit_op_custom(compiler, instruction, 5);
- if (char1a != char1b)
- {
- instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
- sljit_emit_op_custom(compiler, instruction, 5);
- }
- instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
- sljit_emit_op_custom(compiler, instruction, 5);
- if (char2a != char2b)
- {
- instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
- sljit_emit_op_custom(compiler, instruction, 5);
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- restart = LABEL();
- #endif
- OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2));
- OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
- OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
- load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
- jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
- load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
- /* MOVDQA xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x6f;
- instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* PSLLDQ xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x73;
- instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
- instruction[4] = diff;
- sljit_emit_op_custom(compiler, instruction, 5);
- /* PSRLDQ xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- /* instruction[2] = 0x73; */
- instruction[3] = 0xc0 | (3 << 3) | data2_ind;
- instruction[4] = 16 - diff;
- sljit_emit_op_custom(compiler, instruction, 5);
- /* POR xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- jump[1] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[0]);
- /* MOVDQA xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x6f;
- instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* PSLLDQ xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x73;
- instruction[3] = 0xc0 | (7 << 3) | data2_ind;
- instruction[4] = diff;
- sljit_emit_op_custom(compiler, instruction, 5);
- JUMPHERE(jump[1]);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
- fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
- fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
- /* PAND xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xdb;
- instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* PMOVMSKB reg, xmm */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xd7;
- instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* Ignore matches before the first STR_PTR. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
- /* BSF r32, r/m32 */
- instruction[0] = 0x0f;
- instruction[1] = 0xbc;
- instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 3);
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- jump[0] = JUMP(SLJIT_NOT_ZERO);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- /* Main loop. */
- instruction[0] = 0x66;
- instruction[1] = 0x0f;
- start = LABEL();
- load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
- add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
- /* PSRLDQ xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x73;
- instruction[3] = 0xc0 | (3 << 3) | data2_ind;
- instruction[4] = 16 - diff;
- sljit_emit_op_custom(compiler, instruction, 5);
- /* MOVDQA xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x6f;
- instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* PSLLDQ xmm1, xmm2/m128, imm8 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0x73;
- instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
- instruction[4] = diff;
- sljit_emit_op_custom(compiler, instruction, 5);
- /* POR xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
- fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
- /* PAND xmm1, xmm2/m128 */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xdb;
- instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* PMOVMSKB reg, xmm */
- /* instruction[0] = 0x66; */
- /* instruction[1] = 0x0f; */
- instruction[2] = 0xd7;
- instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
- sljit_emit_op_custom(compiler, instruction, 4);
- /* BSF r32, r/m32 */
- instruction[0] = 0x0f;
- instruction[1] = 0xbc;
- instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
- sljit_emit_op_custom(compiler, instruction, 3);
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- JUMPTO(SLJIT_ZERO, start);
- JUMPHERE(jump[0]);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
- jump[0] = jump_if_utf_char_start(compiler, TMP1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
- add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[0]);
- }
- #endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
- if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- }
- static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max)
- {
- sljit_s32 i, j, priority, count;
- sljit_u32 priorities;
- PCRE2_UCHAR a1, a2, b1, b2;
- priorities = 0;
- count = 0;
- for (i = 0; i < max; i++)
- {
- if (chars[i].last_count > 2)
- {
- SLJIT_ASSERT(chars[i].last_count <= 7);
- priorities |= (1 << chars[i].last_count);
- count++;
- }
- }
- if (count < 2)
- return FALSE;
- for (priority = 7; priority > 2; priority--)
- {
- if ((priorities & (1 << priority)) == 0)
- continue;
- for (i = max - 1; i >= 1; i--)
- if (chars[i].last_count >= priority)
- {
- SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1);
- a1 = chars[i].chars[0];
- a2 = chars[i].chars[1];
- j = i - max_fast_forward_char_pair_sse2_offset();
- if (j < 0)
- j = 0;
- while (j < i)
- {
- if (chars[j].last_count >= priority)
- {
- b1 = chars[j].chars[0];
- b2 = chars[j].chars[1];
- if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
- {
- fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2);
- return TRUE;
- }
- }
- j++;
- }
- }
- }
- return FALSE;
- }
- #endif
- #undef SSE2_COMPARE_TYPE_INDEX
- #endif
- static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
- {
- DEFINE_COMPILER;
- struct sljit_label *start;
- struct sljit_jump *match;
- struct sljit_jump *partial_quit;
- PCRE2_UCHAR mask;
- BOOL has_match_end = (common->match_end_ptr != 0);
- SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
- if (has_match_end)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- if (offset > 0)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
- if (has_match_end)
- {
- OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
- CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
- }
- #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
- /* SSE2 accelerated first character search. */
- if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
- {
- fast_forward_first_char2_sse2(common, char1, char2, offset);
- if (offset > 0)
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
- if (has_match_end)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- return;
- }
- #endif
- start = LABEL();
- partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, &common->failed_match, partial_quit);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (char1 == char2)
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
- else
- {
- mask = char1 ^ char2;
- if (is_powerof2(mask))
- {
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
- }
- else
- {
- match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
- JUMPHERE(match);
- }
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf && offset > 0)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
- jumpto_if_not_utf_char_start(compiler, TMP1, start);
- }
- #endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
- if (common->mode != PCRE2_JIT_COMPLETE)
- JUMPHERE(partial_quit);
- if (has_match_end)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- }
- static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_label *start;
- struct sljit_jump *match;
- fast_forward_char_data chars[MAX_N_CHARS];
- sljit_s32 offset;
- PCRE2_UCHAR mask;
- PCRE2_UCHAR *char_set, *char_set_end;
- int i, max, from;
- int range_right = -1, range_len;
- sljit_u8 *update_table = NULL;
- BOOL in_range;
- sljit_u32 rec_count;
- for (i = 0; i < MAX_N_CHARS; i++)
- {
- chars[i].count = 0;
- chars[i].last_count = 0;
- }
- rec_count = 10000;
- max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
- if (max < 1)
- return FALSE;
- /* Convert last_count to priority. */
- for (i = 0; i < max; i++)
- {
- SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
- if (chars[i].count == 1)
- {
- chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
- /* Simplifies algorithms later. */
- chars[i].chars[1] = chars[i].chars[0];
- }
- else if (chars[i].count == 2)
- {
- SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
- if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
- chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
- else
- chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
- }
- else
- chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
- }
- #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
- if (check_fast_forward_char_pair_sse2(common, chars, max))
- return TRUE;
- #endif
- in_range = FALSE;
- /* Prevent compiler "uninitialized" warning */
- from = 0;
- range_len = 4 /* minimum length */ - 1;
- for (i = 0; i <= max; i++)
- {
- if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
- {
- range_len = i - from;
- range_right = i - 1;
- }
- if (i < max && chars[i].count < 255)
- {
- SLJIT_ASSERT(chars[i].count > 0);
- if (!in_range)
- {
- in_range = TRUE;
- from = i;
- }
- }
- else
- in_range = FALSE;
- }
- if (range_right >= 0)
- {
- update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
- if (update_table == NULL)
- return TRUE;
- memset(update_table, IN_UCHARS(range_len), 256);
- for (i = 0; i < range_len; i++)
- {
- SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
- char_set = chars[range_right - i].chars;
- char_set_end = char_set + chars[range_right - i].count;
- do
- {
- if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
- update_table[(*char_set) & 0xff] = IN_UCHARS(i);
- char_set++;
- }
- while (char_set < char_set_end);
- }
- }
- offset = -1;
- /* Scan forward. */
- for (i = 0; i < max; i++)
- {
- if (range_right == i)
- continue;
- if (offset == -1)
- {
- if (chars[i].last_count >= 2)
- offset = i;
- }
- else if (chars[offset].last_count < chars[i].last_count)
- offset = i;
- }
- SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
- if (range_right < 0)
- {
- if (offset < 0)
- return FALSE;
- /* Works regardless the value is 1 or 2. */
- fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
- return TRUE;
- }
- SLJIT_ASSERT(range_right != offset);
- if (common->match_end_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
- CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
- }
- else
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
- SLJIT_ASSERT(range_right >= 0);
- #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
- #endif
- start = LABEL();
- add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
- #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
- #else
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
- #endif
- #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
- #else
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
- #endif
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
- if (offset >= 0)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (chars[offset].count == 1)
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
- else
- {
- mask = chars[offset].chars[0] ^ chars[offset].chars[1];
- if (is_powerof2(mask))
- {
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
- }
- else
- {
- match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
- JUMPHERE(match);
- }
- }
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf && offset != 0)
- {
- if (offset < 0)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- else
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- jumpto_if_not_utf_char_start(compiler, TMP1, start);
- if (offset < 0)
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- #endif
- if (offset >= 0)
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- else
- OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
- return TRUE;
- }
- static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
- {
- PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
- PCRE2_UCHAR oc;
- oc = first_char;
- if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
- {
- oc = TABLE_GET(first_char, common->fcc, first_char);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (first_char > 127 && common->utf)
- oc = UCD_OTHERCASE(first_char);
- #endif
- }
- fast_forward_first_char2(common, first_char, oc, 0);
- }
- static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_label *loop;
- struct sljit_jump *lastchar;
- struct sljit_jump *firstchar;
- struct sljit_jump *quit;
- struct sljit_jump *foundcr = NULL;
- struct sljit_jump *notfoundnl;
- jump_list *newline = NULL;
- if (common->match_end_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- }
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
- #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
- #endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- loop = LABEL();
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
- JUMPHERE(quit);
- JUMPHERE(firstchar);
- JUMPHERE(lastchar);
- if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- return;
- }
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
- firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
- skip_char_back(common);
- loop = LABEL();
- common->ff_newline_shortcut = loop;
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
- foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
- check_newlinechar(common, common->nltype, &newline, FALSE);
- set_jumps(newline, loop);
- if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
- {
- quit = JUMP(SLJIT_JUMP);
- JUMPHERE(foundcr);
- notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
- #endif
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- JUMPHERE(notfoundnl);
- JUMPHERE(quit);
- }
- JUMPHERE(lastchar);
- JUMPHERE(firstchar);
- if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- }
- static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
- static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
- {
- DEFINE_COMPILER;
- const sljit_u8 *start_bits = common->re->start_bitmap;
- struct sljit_label *start;
- struct sljit_jump *partial_quit;
- #if PCRE2_CODE_UNIT_WIDTH != 8
- struct sljit_jump *found = NULL;
- #endif
- jump_list *matches = NULL;
- if (common->match_end_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
- CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
- }
- start = LABEL();
- partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, &common->failed_match, partial_quit);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
- {
- #if PCRE2_CODE_UNIT_WIDTH != 8
- if ((start_bits[31] & 0x80) != 0)
- found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
- else
- CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
- #elif defined SUPPORT_UNICODE
- if (common->utf && is_char7_bitset(start_bits, FALSE))
- CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
- #endif
- OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
- if (sljit_get_register_index(TMP3) >= 0)
- {
- OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
- }
- else
- {
- OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
- }
- JUMPTO(SLJIT_ZERO, start);
- }
- else
- set_jumps(matches, start);
- #if PCRE2_CODE_UNIT_WIDTH != 8
- if (found != NULL)
- JUMPHERE(found);
- #endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (common->mode != PCRE2_JIT_COMPLETE)
- JUMPHERE(partial_quit);
- if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
- }
- static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
- {
- DEFINE_COMPILER;
- struct sljit_label *loop;
- struct sljit_jump *toolong;
- struct sljit_jump *alreadyfound;
- struct sljit_jump *found;
- struct sljit_jump *foundoc = NULL;
- struct sljit_jump *notfound;
- sljit_u32 oc, bit;
- SLJIT_ASSERT(common->req_char_ptr != 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
- toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
- alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
- if (has_firstchar)
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- else
- OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
- loop = LABEL();
- notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
- oc = req_char;
- if (caseless)
- {
- oc = TABLE_GET(req_char, common->fcc, req_char);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (req_char > 127 && common->utf)
- oc = UCD_OTHERCASE(req_char);
- #endif
- }
- if (req_char == oc)
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
- else
- {
- bit = req_char ^ oc;
- if (is_powerof2(bit))
- {
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
- }
- else
- {
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
- foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
- }
- }
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPTO(SLJIT_JUMP, loop);
- JUMPHERE(found);
- if (foundoc)
- JUMPHERE(foundoc);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
- JUMPHERE(alreadyfound);
- JUMPHERE(toolong);
- return notfound;
- }
- static void do_revertframes(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- struct sljit_label *mainloop;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- GET_LOCAL_BASE(TMP1, 0, 0);
- /* Drop frames until we reach STACK_TOP. */
- mainloop = LABEL();
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
- jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- if (sljit_get_register_index (TMP3) < 0)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
- }
- else
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
- GET_LOCAL_BASE(TMP1, 0, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
- }
- JUMPTO(SLJIT_JUMP, mainloop);
- JUMPHERE(jump);
- jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
- /* End of reverting values. */
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- JUMPHERE(jump);
- OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- if (sljit_get_register_index (TMP3) < 0)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
- }
- else
- {
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
- }
- JUMPTO(SLJIT_JUMP, mainloop);
- }
- static void check_wordboundary(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_jump *skipread;
- jump_list *skipread_list = NULL;
- #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
- struct sljit_jump *jump;
- #endif
- SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
- sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- /* Get type of the previous char, and put it to LOCALS1. */
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
- skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
- skip_char_back(common);
- check_start_used_ptr(common);
- read_char(common);
- /* Testing char type. */
- #ifdef SUPPORT_UNICODE
- if (common->use_ucp)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
- add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
- }
- else
- #endif
- {
- #if PCRE2_CODE_UNIT_WIDTH != 8
- jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- #elif defined SUPPORT_UNICODE
- /* Here LOCALS1 has already been zeroed. */
- jump = NULL;
- if (common->utf)
- jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
- #if PCRE2_CODE_UNIT_WIDTH != 8
- JUMPHERE(jump);
- #elif defined SUPPORT_UNICODE
- if (jump != NULL)
- JUMPHERE(jump);
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- }
- JUMPHERE(skipread);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
- check_str_end(common, &skipread_list);
- peek_char(common, READ_CHAR_MAX);
- /* Testing char type. This is a code duplication. */
- #ifdef SUPPORT_UNICODE
- if (common->use_ucp)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
- add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- JUMPHERE(jump);
- }
- else
- #endif
- {
- #if PCRE2_CODE_UNIT_WIDTH != 8
- /* TMP2 may be destroyed by peek_char. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
- jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- #elif defined SUPPORT_UNICODE
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
- jump = NULL;
- if (common->utf)
- jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- #endif
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
- OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
- OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
- #if PCRE2_CODE_UNIT_WIDTH != 8
- JUMPHERE(jump);
- #elif defined SUPPORT_UNICODE
- if (jump != NULL)
- JUMPHERE(jump);
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- }
- set_jumps(skipread_list, LABEL());
- OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
- sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- }
- static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
- {
- /* May destroy TMP1. */
- DEFINE_COMPILER;
- int ranges[MAX_CLASS_RANGE_SIZE];
- sljit_u8 bit, cbit, all;
- int i, byte, length = 0;
- bit = bits[0] & 0x1;
- /* All bits will be zero or one (since bit is zero or one). */
- all = -bit;
- for (i = 0; i < 256; )
- {
- byte = i >> 3;
- if ((i & 0x7) == 0 && bits[byte] == all)
- i += 8;
- else
- {
- cbit = (bits[byte] >> (i & 0x7)) & 0x1;
- if (cbit != bit)
- {
- if (length >= MAX_CLASS_RANGE_SIZE)
- return FALSE;
- ranges[length] = i;
- length++;
- bit = cbit;
- all = -cbit;
- }
- i++;
- }
- }
- if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
- {
- if (length >= MAX_CLASS_RANGE_SIZE)
- return FALSE;
- ranges[length] = 256;
- length++;
- }
- if (length < 0 || length > 4)
- return FALSE;
- bit = bits[0] & 0x1;
- if (invert) bit ^= 0x1;
- /* No character is accepted. */
- if (length == 0 && bit == 0)
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- switch(length)
- {
- case 0:
- /* When bit != 0, all characters are accepted. */
- return TRUE;
- case 1:
- add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
- return TRUE;
- case 2:
- if (ranges[0] + 1 != ranges[1])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
- add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
- }
- else
- add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
- return TRUE;
- case 3:
- if (bit != 0)
- {
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
- if (ranges[0] + 1 != ranges[1])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
- }
- else
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
- return TRUE;
- }
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
- if (ranges[1] + 1 != ranges[2])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
- }
- else
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
- return TRUE;
- case 4:
- if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
- && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
- && (ranges[1] & (ranges[2] - ranges[0])) == 0
- && is_powerof2(ranges[2] - ranges[0]))
- {
- SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
- if (ranges[2] + 1 != ranges[3])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
- add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
- }
- else
- add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
- return TRUE;
- }
- if (bit != 0)
- {
- i = 0;
- if (ranges[0] + 1 != ranges[1])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
- i = ranges[0];
- }
- else
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
- if (ranges[2] + 1 != ranges[3])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
- }
- else
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
- return TRUE;
- }
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
- if (ranges[1] + 1 != ranges[2])
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
- }
- else
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
- return TRUE;
- default:
- SLJIT_UNREACHABLE();
- return FALSE;
- }
- }
- static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
- {
- /* May destroy TMP1. */
- DEFINE_COMPILER;
- uint16_t char_list[MAX_CLASS_CHARS_SIZE];
- uint8_t byte;
- sljit_s32 type;
- int i, j, k, len, c;
- if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
- return FALSE;
- if (invert)
- nclass = !nclass;
- len = 0;
- for (i = 0; i < 32; i++)
- {
- byte = bits[i];
- if (nclass)
- byte = ~byte;
- j = 0;
- while (byte != 0)
- {
- if (byte & 0x1)
- {
- c = i * 8 + j;
- k = len;
- if ((c & 0x20) != 0)
- {
- for (k = 0; k < len; k++)
- if (char_list[k] == c - 0x20)
- {
- char_list[k] |= 0x120;
- break;
- }
- }
- if (k == len)
- {
- if (len >= MAX_CLASS_CHARS_SIZE)
- return FALSE;
- char_list[len++] = (uint16_t) c;
- }
- }
- byte >>= 1;
- j++;
- }
- }
- i = 0;
- j = 0;
- if (char_list[0] == 0)
- {
- i++;
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
- }
- else
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
- while (i < len)
- {
- if ((char_list[i] & 0x100) != 0)
- j++;
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
- CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
- }
- i++;
- }
- if (j != 0)
- {
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
- for (i = 0; i < len; i++)
- if ((char_list[i] & 0x100) != 0)
- {
- j--;
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
- CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
- }
- }
- type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
- add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
- return TRUE;
- }
- static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
- {
- /* May destroy TMP1. */
- if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
- return TRUE;
- return optimize_class_chars(common, bits, nclass, invert, backtracks);
- }
- static void check_anynewline(compiler_common *common)
- {
- /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
- DEFINE_COMPILER;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- #endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
- #if PCRE2_CODE_UNIT_WIDTH == 8
- }
- #endif
- #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- static void check_hspace(compiler_common *common)
- {
- /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
- DEFINE_COMPILER;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- #endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
- #if PCRE2_CODE_UNIT_WIDTH == 8
- }
- #endif
- #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- static void check_vspace(compiler_common *common)
- {
- /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
- DEFINE_COMPILER;
- sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- {
- #endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
- #if PCRE2_CODE_UNIT_WIDTH == 8
- }
- #endif
- #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
- }
- static void do_casefulcmp(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- struct sljit_label *label;
- int char1_reg;
- int char2_reg;
- if (sljit_get_register_index(TMP3) < 0)
- {
- char1_reg = STR_END;
- char2_reg = STACK_TOP;
- }
- else
- {
- char1_reg = TMP3;
- char2_reg = RETURN_ADDR;
- }
- sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- if (char1_reg == STR_END)
- {
- OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
- OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
- }
- if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
- {
- label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPTO(SLJIT_NOT_ZERO, label);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- }
- else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPTO(SLJIT_NOT_ZERO, label);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- else
- {
- label = LABEL();
- OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
- OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPTO(SLJIT_NOT_ZERO, label);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- }
- if (char1_reg == STR_END)
- {
- OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
- OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
- }
- sljit_emit_fast_return(compiler, TMP1, 0);
- }
- static void do_caselesscmp(compiler_common *common)
- {
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- struct sljit_label *label;
- int char1_reg = STR_END;
- int char2_reg;
- int lcc_table;
- int opt_type = 0;
- if (sljit_get_register_index(TMP3) < 0)
- {
- char2_reg = STACK_TOP;
- lcc_table = STACK_LIMIT;
- }
- else
- {
- char2_reg = RETURN_ADDR;
- lcc_table = TMP3;
- }
- if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
- opt_type = 1;
- else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
- opt_type = 2;
- sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
- if (char2_reg == STACK_TOP)
- {
- OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
- OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
- }
- OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
- if (opt_type == 1)
- {
- label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- }
- else if (opt_type == 2)
- {
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- }
- else
- {
- label = LABEL();
- OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
- OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- #if PCRE2_CODE_UNIT_WIDTH != 8
- jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
- #endif
- OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
- #if PCRE2_CODE_UNIT_WIDTH != 8
- JUMPHERE(jump);
- jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
- #endif
- OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
- #if PCRE2_CODE_UNIT_WIDTH != 8
- JUMPHERE(jump);
- #endif
- if (opt_type == 0)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPTO(SLJIT_NOT_ZERO, label);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- if (opt_type == 2)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (char2_reg == STACK_TOP)
- {
- OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
- OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
- }
- OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
- sljit_emit_fast_return(compiler, TMP1, 0);
- }
- #if defined SUPPORT_UNICODE
- static PCRE2_SPTR SLJIT_FUNC do_utf_caselesscmp(PCRE2_SPTR src1, PCRE2_SPTR src2, PCRE2_SPTR end1, PCRE2_SPTR end2)
- {
- /* This function would be ineffective to do in JIT level. */
- sljit_u32 c1, c2;
- const ucd_record *ur;
- const sljit_u32 *pp;
- while (src1 < end1)
- {
- if (src2 >= end2)
- return (PCRE2_SPTR)1;
- GETCHARINC(c1, src1);
- GETCHARINC(c2, src2);
- ur = GET_UCD(c2);
- if (c1 != c2 && c1 != c2 + ur->other_case)
- {
- pp = PRIV(ucd_caseless_sets) + ur->caseset;
- for (;;)
- {
- if (c1 < *pp) return NULL;
- if (c1 == *pp++) break;
- }
- }
- }
- return src2;
- }
- #endif /* SUPPORT_UNICODE */
- static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
- compare_context *context, jump_list **backtracks)
- {
- DEFINE_COMPILER;
- unsigned int othercasebit = 0;
- PCRE2_SPTR othercasechar = NULL;
- #ifdef SUPPORT_UNICODE
- int utflength;
- #endif
- if (caseless && char_has_othercase(common, cc))
- {
- othercasebit = char_get_othercase_bit(common, cc);
- SLJIT_ASSERT(othercasebit);
- /* Extracting bit difference info. */
- #if PCRE2_CODE_UNIT_WIDTH == 8
- othercasechar = cc + (othercasebit >> 8);
- othercasebit &= 0xff;
- #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- /* Note that this code only handles characters in the BMP. If there
- ever are characters outside the BMP whose othercase differs in only one
- bit from itself (there currently are none), this code will need to be
- revised for PCRE2_CODE_UNIT_WIDTH == 32. */
- othercasechar = cc + (othercasebit >> 9);
- if ((othercasebit & 0x100) != 0)
- othercasebit = (othercasebit & 0xff) << 8;
- else
- othercasebit &= 0xff;
- #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
- }
- if (context->sourcereg == -1)
- {
- #if PCRE2_CODE_UNIT_WIDTH == 8
- #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- if (context->length >= 4)
- OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
- else if (context->length >= 2)
- OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
- else
- #endif
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- if (context->length >= 4)
- OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
- else
- #endif
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
- #elif PCRE2_CODE_UNIT_WIDTH == 32
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
- #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
- context->sourcereg = TMP2;
- }
- #ifdef SUPPORT_UNICODE
- utflength = 1;
- if (common->utf && HAS_EXTRALEN(*cc))
- utflength += GET_EXTRALEN(*cc);
- do
- {
- #endif
- context->length -= IN_UCHARS(1);
- #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
- /* Unaligned read is supported. */
- if (othercasebit != 0 && othercasechar == cc)
- {
- context->c.asuchars[context->ucharptr] = *cc | othercasebit;
- context->oc.asuchars[context->ucharptr] = othercasebit;
- }
- else
- {
- context->c.asuchars[context->ucharptr] = *cc;
- context->oc.asuchars[context->ucharptr] = 0;
- }
- context->ucharptr++;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
- #else
- if (context->ucharptr >= 2 || context->length == 0)
- #endif
- {
- if (context->length >= 4)
- OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
- else if (context->length >= 2)
- OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
- #if PCRE2_CODE_UNIT_WIDTH == 8
- else if (context->length >= 1)
- OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
- switch(context->ucharptr)
- {
- case 4 / sizeof(PCRE2_UCHAR):
- if (context->oc.asint != 0)
- OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
- break;
- case 2 / sizeof(PCRE2_UCHAR):
- if (context->oc.asushort != 0)
- OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
- break;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- case 1:
- if (context->oc.asbyte != 0)
- OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
- break;
- #endif
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- context->ucharptr = 0;
- }
- #else
- /* Unaligned read is unsupported or in 32 bit mode. */
- if (context->length >= 1)
- OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
- context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
- if (othercasebit != 0 && othercasechar == cc)
- {
- OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
- }
- else
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
- #endif
- cc++;
- #ifdef SUPPORT_UNICODE
- utflength--;
- }
- while (utflength > 0);
- #endif
- return cc;
- }
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- #define SET_TYPE_OFFSET(value) \
- if ((value) != typeoffset) \
- { \
- if ((value) < typeoffset) \
- OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
- else \
- OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
- } \
- typeoffset = (value);
- #define SET_CHAR_OFFSET(value) \
- if ((value) != charoffset) \
- { \
- if ((value) < charoffset) \
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
- else \
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
- } \
- charoffset = (value);
- static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
- static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
- {
- DEFINE_COMPILER;
- jump_list *found = NULL;
- jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
- sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
- struct sljit_jump *jump = NULL;
- PCRE2_SPTR ccbegin;
- int compares, invertcmp, numberofcmps;
- #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
- BOOL utf = common->utf;
- #endif
- #ifdef SUPPORT_UNICODE
- BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
- BOOL charsaved = FALSE;
- int typereg = TMP1;
- const sljit_u32 *other_cases;
- sljit_uw typeoffset;
- #endif
- /* Scanning the necessary info. */
- cc++;
- ccbegin = cc;
- compares = 0;
- if (cc[-1] & XCL_MAP)
- {
- min = 0;
- cc += 32 / sizeof(PCRE2_UCHAR);
- }
- while (*cc != XCL_END)
- {
- compares++;
- if (*cc == XCL_SINGLE)
- {
- cc ++;
- GETCHARINCTEST(c, cc);
- if (c > max) max = c;
- if (c < min) min = c;
- #ifdef SUPPORT_UNICODE
- needschar = TRUE;
- #endif
- }
- else if (*cc == XCL_RANGE)
- {
- cc ++;
- GETCHARINCTEST(c, cc);
- if (c < min) min = c;
- GETCHARINCTEST(c, cc);
- if (c > max) max = c;
- #ifdef SUPPORT_UNICODE
- needschar = TRUE;
- #endif
- }
- #ifdef SUPPORT_UNICODE
- else
- {
- SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
- cc++;
- if (*cc == PT_CLIST)
- {
- other_cases = PRIV(ucd_caseless_sets) + cc[1];
- while (*other_cases != NOTACHAR)
- {
- if (*other_cases > max) max = *other_cases;
- if (*other_cases < min) min = *other_cases;
- other_cases++;
- }
- }
- else
- {
- max = READ_CHAR_MAX;
- min = 0;
- }
- switch(*cc)
- {
- case PT_ANY:
- /* Any either accepts everything or ignored. */
- if (cc[-1] == XCL_PROP)
- {
- compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
- if (list == backtracks)
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- return;
- }
- break;
- case PT_LAMP:
- case PT_GC:
- case PT_PC:
- case PT_ALNUM:
- needstype = TRUE;
- break;
- case PT_SC:
- needsscript = TRUE;
- break;
- case PT_SPACE:
- case PT_PXSPACE:
- case PT_WORD:
- case PT_PXGRAPH:
- case PT_PXPRINT:
- case PT_PXPUNCT:
- needstype = TRUE;
- needschar = TRUE;
- break;
- case PT_CLIST:
- case PT_UCNC:
- needschar = TRUE;
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- cc += 2;
- }
- #endif
- }
- SLJIT_ASSERT(compares > 0);
- /* We are not necessary in utf mode even in 8 bit mode. */
- cc = ccbegin;
- read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
- if ((cc[-1] & XCL_HASPROP) == 0)
- {
- if ((cc[-1] & XCL_MAP) != 0)
- {
- jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
- {
- OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
- OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
- add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
- }
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump);
- cc += 32 / sizeof(PCRE2_UCHAR);
- }
- else
- {
- OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
- add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
- }
- }
- else if ((cc[-1] & XCL_MAP) != 0)
- {
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
- #ifdef SUPPORT_UNICODE
- charsaved = TRUE;
- #endif
- if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
- {
- #if PCRE2_CODE_UNIT_WIDTH == 8
- jump = NULL;
- if (common->utf)
- #endif
- jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
- OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
- add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf)
- #endif
- JUMPHERE(jump);
- }
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
- cc += 32 / sizeof(PCRE2_UCHAR);
- }
- #ifdef SUPPORT_UNICODE
- if (needstype || needsscript)
- {
- if (needschar && !charsaved)
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
- #if PCRE2_CODE_UNIT_WIDTH == 32
- if (!common->utf)
- {
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
- JUMPHERE(jump);
- }
- #endif
- OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
- OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
- /* Before anything else, we deal with scripts. */
- if (needsscript)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
- ccbegin = cc;
- while (*cc != XCL_END)
- {
- if (*cc == XCL_SINGLE)
- {
- cc ++;
- GETCHARINCTEST(c, cc);
- }
- else if (*cc == XCL_RANGE)
- {
- cc ++;
- GETCHARINCTEST(c, cc);
- GETCHARINCTEST(c, cc);
- }
- else
- {
- SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
- cc++;
- if (*cc == PT_SC)
- {
- compares--;
- invertcmp = (compares == 0 && list != backtracks);
- if (cc[-1] == XCL_NOTPROP)
- invertcmp ^= 0x1;
- jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
- add_jump(compiler, compares > 0 ? list : backtracks, jump);
- }
- cc += 2;
- }
- }
- cc = ccbegin;
- }
- if (needschar)
- {
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
- }
- if (needstype)
- {
- if (!needschar)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
- }
- else
- {
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
- typereg = RETURN_ADDR;
- }
- }
- }
- #endif
- /* Generating code. */
- charoffset = 0;
- numberofcmps = 0;
- #ifdef SUPPORT_UNICODE
- typeoffset = 0;
- #endif
- while (*cc != XCL_END)
- {
- compares--;
- invertcmp = (compares == 0 && list != backtracks);
- jump = NULL;
- if (*cc == XCL_SINGLE)
- {
- cc ++;
- GETCHARINCTEST(c, cc);
- if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- numberofcmps++;
- }
- else if (numberofcmps > 0)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- numberofcmps = 0;
- }
- else
- {
- jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- numberofcmps = 0;
- }
- }
- else if (*cc == XCL_RANGE)
- {
- cc ++;
- GETCHARINCTEST(c, cc);
- SET_CHAR_OFFSET(c);
- GETCHARINCTEST(c, cc);
- if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- numberofcmps++;
- }
- else if (numberofcmps > 0)
- {
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- numberofcmps = 0;
- }
- else
- {
- jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- numberofcmps = 0;
- }
- }
- #ifdef SUPPORT_UNICODE
- else
- {
- SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
- if (*cc == XCL_NOTPROP)
- invertcmp ^= 0x1;
- cc++;
- switch(*cc)
- {
- case PT_ANY:
- if (!invertcmp)
- jump = JUMP(SLJIT_JUMP);
- break;
- case PT_LAMP:
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
- case PT_GC:
- c = PRIV(ucp_typerange)[(int)cc[1] * 2];
- SET_TYPE_OFFSET(c);
- jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
- break;
- case PT_PC:
- jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
- break;
- case PT_SC:
- compares++;
- /* Do nothing. */
- break;
- case PT_SPACE:
- case PT_PXSPACE:
- SET_CHAR_OFFSET(9);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- SET_TYPE_OFFSET(ucp_Zl);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
- case PT_WORD:
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- /* Fall through. */
- case PT_ALNUM:
- SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- SET_TYPE_OFFSET(ucp_Nd);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
- case PT_CLIST:
- other_cases = PRIV(ucd_caseless_sets) + cc[1];
- /* At least three characters are required.
- Otherwise this case would be handled by the normal code path. */
- SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
- SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
- /* Optimizing character pairs, if their difference is power of 2. */
- if (is_powerof2(other_cases[1] ^ other_cases[0]))
- {
- if (charoffset == 0)
- OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
- else
- {
- OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
- }
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- other_cases += 2;
- }
- else if (is_powerof2(other_cases[2] ^ other_cases[1]))
- {
- if (charoffset == 0)
- OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
- else
- {
- OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
- }
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
- OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
- other_cases += 3;
- }
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- }
- while (*other_cases != NOTACHAR)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
- OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
- }
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
- case PT_UCNC:
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- SET_CHAR_OFFSET(0xa0);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- SET_CHAR_OFFSET(0);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
- case PT_PXGRAPH:
- /* C and Z groups are the farthest two groups. */
- SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
- jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
- /* In case of ucp_Cf, we overwrite the result. */
- SET_CHAR_OFFSET(0x2066);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- JUMPHERE(jump);
- jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
- break;
- case PT_PXPRINT:
- /* C and Z groups are the farthest two groups. */
- SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
- OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
- jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
- /* In case of ucp_Cf, we overwrite the result. */
- SET_CHAR_OFFSET(0x2066);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- JUMPHERE(jump);
- jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
- break;
- case PT_PXPUNCT:
- SET_TYPE_OFFSET(ucp_Sc);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- SET_CHAR_OFFSET(0);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
- OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
- SET_TYPE_OFFSET(ucp_Pc);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- cc += 2;
- }
- #endif
- if (jump != NULL)
- add_jump(compiler, compares > 0 ? list : backtracks, jump);
- }
- if (found != NULL)
- set_jumps(found, LABEL());
- }
- #undef SET_TYPE_OFFSET
- #undef SET_CHAR_OFFSET
- #endif
- static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
- {
- DEFINE_COMPILER;
- int length;
- struct sljit_jump *jump[4];
- #ifdef SUPPORT_UNICODE
- struct sljit_label *label;
- #endif /* SUPPORT_UNICODE */
- switch(type)
- {
- case OP_SOD:
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
- return cc;
- case OP_SOM:
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
- return cc;
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
- return cc;
- case OP_EODN:
- /* Requires rather complex checks. */
- jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
- else
- {
- jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
- check_partial(common, TRUE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- }
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else if (common->nltype == NLTYPE_FIXED)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
- }
- else
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- jump[2] = JUMP(SLJIT_GREATER);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
- /* Equal. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- if (common->nltype == NLTYPE_ANYCRLF)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
- }
- else
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
- add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
- }
- JUMPHERE(jump[2]);
- JUMPHERE(jump[3]);
- }
- JUMPHERE(jump[0]);
- check_partial(common, FALSE);
- return cc;
- case OP_EOD:
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
- check_partial(common, FALSE);
- return cc;
- case OP_DOLL:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
- if (!common->endonly)
- compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
- else
- {
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
- check_partial(common, FALSE);
- }
- return cc;
- case OP_DOLLM:
- jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
- check_partial(common, FALSE);
- jump[0] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[1]);
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
- else
- {
- jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
- /* STR_PTR = STR_END - IN_UCHARS(1) */
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- check_partial(common, TRUE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- }
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else
- {
- peek_char(common, common->nlmax);
- check_newlinechar(common, common->nltype, backtracks, FALSE);
- }
- JUMPHERE(jump[0]);
- return cc;
- case OP_CIRC:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
- return cc;
- case OP_CIRCM:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
- jump[0] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[1]);
- if (!common->alt_circumflex)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else
- {
- skip_char_back(common);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- check_newlinechar(common, common->nltype, backtracks, FALSE);
- }
- JUMPHERE(jump[0]);
- return cc;
- case OP_REVERSE:
- length = GET(cc, 0);
- if (length == 0)
- return cc + LINK_SIZE;
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
- label = LABEL();
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
- skip_char_back(common);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
- #endif
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
- }
- check_start_used_ptr(common);
- return cc + LINK_SIZE;
- }
- SLJIT_UNREACHABLE();
- return cc;
- }
- #ifdef SUPPORT_UNICODE
- #if PCRE2_CODE_UNIT_WIDTH != 32
- static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
- {
- PCRE2_SPTR start_subject = args->begin;
- PCRE2_SPTR end_subject = args->end;
- int lgb, rgb, len, ricount;
- PCRE2_SPTR prevcc, bptr;
- uint32_t c;
- prevcc = cc;
- GETCHARINC(c, cc);
- lgb = UCD_GRAPHBREAK(c);
- while (cc < end_subject)
- {
- len = 1;
- GETCHARLEN(c, cc, len);
- rgb = UCD_GRAPHBREAK(c);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- /* Not breaking between Regional Indicators is allowed only if there
- are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
- {
- ricount = 0;
- bptr = prevcc;
- /* bptr is pointing to the left-hand character */
- while (bptr > start_subject)
- {
- bptr--;
- BACKCHAR(bptr);
- GETCHAR(c, bptr);
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
- if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
- prevcc = cc;
- cc += len;
- }
- return cc;
- }
- #endif
- static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
- {
- PCRE2_SPTR start_subject = args->begin;
- PCRE2_SPTR end_subject = args->end;
- int lgb, rgb, ricount;
- PCRE2_SPTR bptr;
- uint32_t c;
- GETCHARINC(c, cc);
- lgb = UCD_GRAPHBREAK(c);
- while (cc < end_subject)
- {
- c = *cc;
- rgb = UCD_GRAPHBREAK(c);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- /* Not breaking between Regional Indicators is allowed only if there
- are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
- {
- ricount = 0;
- bptr = cc - 1;
- /* bptr is pointing to the left-hand character */
- while (bptr > start_subject)
- {
- bptr--;
- c = *bptr;
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
- if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
- cc++;
- }
- return cc;
- }
- #endif
- static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
- {
- DEFINE_COMPILER;
- int length;
- unsigned int c, oc, bit;
- compare_context context;
- struct sljit_jump *jump[3];
- jump_list *end_list;
- #ifdef SUPPORT_UNICODE
- PCRE2_UCHAR propdata[5];
- #endif /* SUPPORT_UNICODE */
- switch(type)
- {
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- /* Digits are usually 0-9, so it is worth to optimize them. */
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
- read_char7_type(common, type == OP_NOT_DIGIT);
- else
- #endif
- read_char8_type(common, type == OP_NOT_DIGIT);
- /* Flip the starting bit in the negative case. */
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
- add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
- return cc;
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
- read_char7_type(common, type == OP_NOT_WHITESPACE);
- else
- #endif
- read_char8_type(common, type == OP_NOT_WHITESPACE);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
- add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
- return cc;
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
- read_char7_type(common, type == OP_NOT_WORDCHAR);
- else
- #endif
- read_char8_type(common, type == OP_NOT_WORDCHAR);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
- add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
- return cc;
- case OP_ANY:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
- end_list = NULL;
- if (common->mode != PCRE2_JIT_PARTIAL_HARD)
- add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- else
- check_str_end(common, &end_list);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
- set_jumps(end_list, LABEL());
- JUMPHERE(jump[0]);
- }
- else
- check_newlinechar(common, common->nltype, backtracks, TRUE);
- return cc;
- case OP_ALLANY:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
- #if PCRE2_CODE_UNIT_WIDTH == 8
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- #elif PCRE2_CODE_UNIT_WIDTH == 16
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- #endif
- JUMPHERE(jump[0]);
- #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
- return cc;
- }
- #endif
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- return cc;
- case OP_ANYBYTE:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- return cc;
- #ifdef SUPPORT_UNICODE
- case OP_NOTPROP:
- case OP_PROP:
- propdata[0] = XCL_HASPROP;
- propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
- propdata[2] = cc[0];
- propdata[3] = cc[1];
- propdata[4] = XCL_END;
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- compile_xclass_matchingpath(common, propdata, backtracks);
- return cc + 2;
- #endif
- case OP_ANYNL:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
- jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
- /* We don't need to handle soft partial matching case. */
- end_list = NULL;
- if (common->mode != PCRE2_JIT_PARTIAL_HARD)
- add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- else
- check_str_end(common, &end_list);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump[2] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[0]);
- check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
- set_jumps(end_list, LABEL());
- JUMPHERE(jump[1]);
- JUMPHERE(jump[2]);
- return cc;
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
- add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
- return cc;
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
- add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
- sljit_set_current_flags(compiler, SLJIT_SET_Z);
- add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
- return cc;
- #ifdef SUPPORT_UNICODE
- case OP_EXTUNI:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
- OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
- #if PCRE2_CODE_UNIT_WIDTH != 32
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
- common->utf ? SLJIT_FUNC_OFFSET(do_extuni_utf) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
- #else
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
- #endif
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
- if (common->mode == PCRE2_JIT_PARTIAL_HARD)
- {
- jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
- /* Since we successfully read a char above, partial matching must occure. */
- check_partial(common, TRUE);
- JUMPHERE(jump[0]);
- }
- return cc;
- #endif
- case OP_CHAR:
- case OP_CHARI:
- length = 1;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
- #endif
- if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
- && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
- {
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
- context.length = IN_UCHARS(length);
- context.sourcereg = -1;
- #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- context.ucharptr = 0;
- #endif
- return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
- }
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- GETCHAR(c, cc);
- }
- else
- #endif
- c = *cc;
- if (type == OP_CHAR || !char_has_othercase(common, cc))
- {
- read_char_range(common, c, c, FALSE);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
- return cc + length;
- }
- oc = char_othercase(common, c);
- read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
- bit = c ^ oc;
- if (is_powerof2(bit))
- {
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
- return cc + length;
- }
- jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
- JUMPHERE(jump[0]);
- return cc + length;
- case OP_NOT:
- case OP_NOTI:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- length = 1;
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- #if PCRE2_CODE_UNIT_WIDTH == 8
- c = *cc;
- if (c < 128)
- {
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- if (type == OP_NOT || !char_has_othercase(common, cc))
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
- else
- {
- /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
- OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
- }
- /* Skip the variable-length character. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- JUMPHERE(jump[0]);
- return cc + 1;
- }
- else
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- {
- GETCHARLEN(c, cc, length);
- }
- }
- else
- #endif /* SUPPORT_UNICODE */
- c = *cc;
- if (type == OP_NOT || !char_has_othercase(common, cc))
- {
- read_char_range(common, c, c, TRUE);
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
- }
- else
- {
- oc = char_othercase(common, c);
- read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
- bit = c ^ oc;
- if (is_powerof2(bit))
- {
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
- }
- else
- {
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
- }
- }
- return cc + length;
- case OP_CLASS:
- case OP_NCLASS:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
- read_char_range(common, 0, bit, type == OP_NCLASS);
- #else
- read_char_range(common, 0, 255, type == OP_NCLASS);
- #endif
- if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
- return cc + 32 / sizeof(PCRE2_UCHAR);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
- jump[0] = NULL;
- if (common->utf)
- {
- jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
- if (type == OP_CLASS)
- {
- add_jump(compiler, backtracks, jump[0]);
- jump[0] = NULL;
- }
- }
- #elif PCRE2_CODE_UNIT_WIDTH != 8
- jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- if (type == OP_CLASS)
- {
- add_jump(compiler, backtracks, jump[0]);
- jump[0] = NULL;
- }
- #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
- OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
- OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
- OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
- add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- if (jump[0] != NULL)
- JUMPHERE(jump[0]);
- #endif
- return cc + 32 / sizeof(PCRE2_UCHAR);
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- case OP_XCLASS:
- if (check_str_ptr)
- detect_partial_match(common, backtracks);
- compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
- return cc + GET(cc, 0) - 1;
- #endif
- }
- SLJIT_UNREACHABLE();
- return cc;
- }
- static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
- {
- /* This function consumes at least one input character. */
- /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
- DEFINE_COMPILER;
- PCRE2_SPTR ccbegin = cc;
- compare_context context;
- int size;
- context.length = 0;
- do
- {
- if (cc >= ccend)
- break;
- if (*cc == OP_CHAR)
- {
- size = 1;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[1]))
- size += GET_EXTRALEN(cc[1]);
- #endif
- }
- else if (*cc == OP_CHARI)
- {
- size = 1;
- #ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
- size = 0;
- else if (HAS_EXTRALEN(cc[1]))
- size += GET_EXTRALEN(cc[1]);
- }
- else
- #endif
- if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
- size = 0;
- }
- else
- size = 0;
- cc += 1 + size;
- context.length += IN_UCHARS(size);
- }
- while (size > 0 && context.length <= 128);
- cc = ccbegin;
- if (context.length > 0)
- {
- /* We have a fixed-length byte sequence. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
- context.sourcereg = -1;
- #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- context.ucharptr = 0;
- #endif
- do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
- return cc;
- }
- /* A non-fixed length character will be checked if length == 0. */
- return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
- }
- /* Forward definitions. */
- static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
- static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
- #define PUSH_BACKTRACK(size, ccstart, error) \
- do \
- { \
- backtrack = sljit_alloc_memory(compiler, (size)); \
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
- return error; \
- memset(backtrack, 0, size); \
- backtrack->prev = parent->top; \
- backtrack->cc = (ccstart); \
- parent->top = backtrack; \
- } \
- while (0)
- #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
- do \
- { \
- backtrack = sljit_alloc_memory(compiler, (size)); \
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
- return; \
- memset(backtrack, 0, size); \
- backtrack->prev = parent->top; \
- backtrack->cc = (ccstart); \
- parent->top = backtrack; \
- } \
- while (0)
- #define BACKTRACK_AS(type) ((type *)backtrack)
- static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
- {
- /* The OVECTOR offset goes to TMP2. */
- DEFINE_COMPILER;
- int count = GET2(cc, 1 + IMM2_SIZE);
- PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
- unsigned int offset;
- jump_list *found = NULL;
- SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
- count--;
- while (count-- > 0)
- {
- offset = GET2(slot, 0) << 1;
- GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
- add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
- slot += common->name_entry_size;
- }
- offset = GET2(slot, 0) << 1;
- GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
- if (backtracks != NULL && !common->unset_backref)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
- set_jumps(found, LABEL());
- }
- static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
- {
- DEFINE_COMPILER;
- BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
- int offset = 0;
- struct sljit_jump *jump = NULL;
- struct sljit_jump *partial;
- struct sljit_jump *nopartial;
- if (ref)
- {
- offset = GET2(cc, 1) << 1;
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- /* OVECTOR(1) contains the "string begin - 1" constant. */
- if (withchecks && !common->unset_backref)
- add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
- }
- else
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
- #if defined SUPPORT_UNICODE
- if (common->utf && *cc == OP_REFI)
- {
- SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
- if (ref)
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- else
- OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- if (withchecks)
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
- /* No free saved registers so save data on stack. */
- OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
- add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
- nopartial = JUMP(SLJIT_NOT_EQUAL);
- OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
- check_partial(common, FALSE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(nopartial);
- }
- }
- else
- #endif /* SUPPORT_UNICODE */
- {
- if (ref)
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
- else
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
- if (withchecks)
- jump = JUMP(SLJIT_ZERO);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
- partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- add_jump(compiler, backtracks, partial);
- add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- if (common->mode != PCRE2_JIT_COMPLETE)
- {
- nopartial = JUMP(SLJIT_JUMP);
- JUMPHERE(partial);
- /* TMP2 -= STR_END - STR_PTR */
- OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
- partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
- add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- JUMPHERE(partial);
- check_partial(common, FALSE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(nopartial);
- }
- }
- if (jump != NULL)
- {
- if (emptyfail)
- add_jump(compiler, backtracks, jump);
- else
- JUMPHERE(jump);
- }
- }
- static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
- backtrack_common *backtrack;
- PCRE2_UCHAR type;
- int offset = 0;
- struct sljit_label *label;
- struct sljit_jump *zerolength;
- struct sljit_jump *jump = NULL;
- PCRE2_SPTR ccbegin = cc;
- int min = 0, max = 0;
- BOOL minimize;
- PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
- if (ref)
- offset = GET2(cc, 1) << 1;
- else
- cc += IMM2_SIZE;
- type = cc[1 + IMM2_SIZE];
- SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
- minimize = (type & 0x1) != 0;
- switch(type)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- min = 0;
- max = 0;
- cc += 1 + IMM2_SIZE + 1;
- break;
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- min = 1;
- max = 0;
- cc += 1 + IMM2_SIZE + 1;
- break;
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- min = 0;
- max = 1;
- cc += 1 + IMM2_SIZE + 1;
- break;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- min = GET2(cc, 1 + IMM2_SIZE + 1);
- max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
- cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- if (!minimize)
- {
- if (min == 0)
- {
- allocate_stack(common, 2);
- if (ref)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
- /* Temporary release of STR_PTR. */
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- /* Handles both invalid and empty cases. Since the minimum repeat,
- is zero the invalid case is basically the same as an empty case. */
- if (ref)
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- else
- {
- compile_dnref_search(common, ccbegin, NULL);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- }
- /* Restore if not zero length. */
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- }
- else
- {
- allocate_stack(common, 1);
- if (ref)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- if (ref)
- {
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- }
- else
- {
- compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- }
- }
- if (min > 1 || max > 1)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
- label = LABEL();
- if (!ref)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
- compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
- if (min > 1 || max > 1)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
- if (min > 1)
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
- if (max > 1)
- {
- jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, label);
- JUMPHERE(jump);
- }
- }
- if (max == 0)
- {
- /* Includes min > 1 case as well. */
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, label);
- }
- JUMPHERE(zerolength);
- BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
- count_match(common);
- return cc;
- }
- allocate_stack(common, ref ? 2 : 3);
- if (ref)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- if (type != OP_CRMINSTAR)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
- if (min == 0)
- {
- /* Handles both invalid and empty cases. Since the minimum repeat,
- is zero the invalid case is basically the same as an empty case. */
- if (ref)
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- else
- {
- compile_dnref_search(common, ccbegin, NULL);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- }
- /* Length is non-zero, we can match real repeats. */
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- jump = JUMP(SLJIT_JUMP);
- }
- else
- {
- if (ref)
- {
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- }
- else
- {
- compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
- zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
- }
- }
- BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
- if (max > 0)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
- if (!ref)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
- compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- if (min > 1)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
- }
- else if (max > 0)
- OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
- if (jump != NULL)
- JUMPHERE(jump);
- JUMPHERE(zerolength);
- count_match(common);
- return cc;
- }
- static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- recurse_entry *entry = common->entries;
- recurse_entry *prev = NULL;
- sljit_sw start = GET(cc, 1);
- PCRE2_SPTR start_cc;
- BOOL needs_control_head;
- PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
- /* Inlining simple patterns. */
- if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
- {
- start_cc = common->start + start;
- compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
- BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
- return cc + 1 + LINK_SIZE;
- }
- while (entry != NULL)
- {
- if (entry->start == start)
- break;
- prev = entry;
- entry = entry->next;
- }
- if (entry == NULL)
- {
- entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return NULL;
- entry->next = NULL;
- entry->entry_label = NULL;
- entry->backtrack_label = NULL;
- entry->entry_calls = NULL;
- entry->backtrack_calls = NULL;
- entry->start = start;
- if (prev != NULL)
- prev->next = entry;
- else
- common->entries = entry;
- }
- BACKTRACK_AS(recurse_backtrack)->entry = entry;
- if (entry->entry_label == NULL)
- add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
- else
- JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
- /* Leave if the match is failed. */
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
- BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
- return cc + 1 + LINK_SIZE;
- }
- static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
- {
- PCRE2_SPTR begin;
- PCRE2_SIZE *ovector;
- sljit_u32 oveccount, capture_top;
- if (arguments->callout == NULL)
- return 0;
- SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
- begin = arguments->begin;
- ovector = (PCRE2_SIZE*)(callout_block + 1);
- oveccount = callout_block->capture_top;
- SLJIT_ASSERT(oveccount >= 1);
- callout_block->version = 2;
- callout_block->callout_flags = 0;
- /* Offsets in subject. */
- callout_block->subject_length = arguments->end - arguments->begin;
- callout_block->start_match = jit_ovector[0] - begin;
- callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
- callout_block->subject = begin;
- /* Convert and copy the JIT offset vector to the ovector array. */
- callout_block->capture_top = 1;
- callout_block->offset_vector = ovector;
- ovector[0] = PCRE2_UNSET;
- ovector[1] = PCRE2_UNSET;
- ovector += 2;
- jit_ovector += 2;
- capture_top = 1;
- /* Convert pointers to sizes. */
- while (--oveccount != 0)
- {
- capture_top++;
- ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
- ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
- if (ovector[0] != PCRE2_UNSET)
- callout_block->capture_top = capture_top;
- ovector += 2;
- jit_ovector += 2;
- }
- return (arguments->callout)(callout_block, arguments->callout_data);
- }
- #define CALLOUT_ARG_OFFSET(arg) \
- SLJIT_OFFSETOF(pcre2_callout_block, arg)
- static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- sljit_s32 mov_opcode;
- unsigned int callout_length = (*cc == OP_CALLOUT)
- ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
- sljit_sw value1;
- sljit_sw value2;
- sljit_sw value3;
- sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
- PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
- callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
- allocate_stack(common, callout_arg_size);
- SLJIT_ASSERT(common->capture_last_ptr != 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
- OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
- OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
- OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
- /* These pointer sized fields temporarly stores internal variables. */
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
- if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
- mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
- OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
- OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
- if (*cc == OP_CALLOUT)
- {
- value1 = 0;
- value2 = 0;
- value3 = 0;
- }
- else
- {
- value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
- value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
- value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
- }
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
- OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
- OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
- SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
- /* Needed to save important temporary registers. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
- /* SLJIT_R0 = arguments */
- OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
- GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- free_stack(common, callout_arg_size);
- /* Check return value. */
- OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
- if (common->abort_label == NULL)
- add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
- else
- JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
- return cc + callout_length;
- }
- #undef CALLOUT_ARG_SIZE
- #undef CALLOUT_ARG_OFFSET
- static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
- {
- while (TRUE)
- {
- switch (*cc)
- {
- case OP_CALLOUT_STR:
- cc += GET(cc, 1 + 2*LINK_SIZE);
- break;
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- case OP_CALLOUT:
- case OP_ALT:
- cc += PRIV(OP_lengths)[*cc];
- break;
- case OP_KET:
- return FALSE;
- default:
- return TRUE;
- }
- }
- }
- static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
- {
- DEFINE_COMPILER;
- int framesize;
- int extrasize;
- BOOL local_quit_available = FALSE;
- BOOL needs_control_head;
- int private_data_ptr;
- backtrack_common altbacktrack;
- PCRE2_SPTR ccbegin;
- PCRE2_UCHAR opcode;
- PCRE2_UCHAR bra = OP_BRA;
- jump_list *tmp = NULL;
- jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
- jump_list **found;
- /* Saving previous accept variables. */
- BOOL save_local_quit_available = common->local_quit_available;
- BOOL save_in_positive_assertion = common->in_positive_assertion;
- then_trap_backtrack *save_then_trap = common->then_trap;
- struct sljit_label *save_quit_label = common->quit_label;
- struct sljit_label *save_accept_label = common->accept_label;
- jump_list *save_quit = common->quit;
- jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
- jump_list *save_accept = common->accept;
- struct sljit_jump *jump;
- struct sljit_jump *brajump = NULL;
- /* Assert captures then. */
- common->then_trap = NULL;
- if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
- {
- SLJIT_ASSERT(!conditional);
- bra = *cc;
- cc++;
- }
- private_data_ptr = PRIVATE_DATA(cc);
- SLJIT_ASSERT(private_data_ptr != 0);
- framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
- backtrack->framesize = framesize;
- backtrack->private_data_ptr = private_data_ptr;
- opcode = *cc;
- SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
- found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
- ccbegin = cc;
- cc += GET(cc, 1);
- if (bra == OP_BRAMINZERO)
- {
- /* This is a braminzero backtrack path. */
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- }
- if (framesize < 0)
- {
- extrasize = 1;
- if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
- extrasize = 0;
- if (needs_control_head)
- extrasize++;
- if (framesize == no_frame)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
- if (extrasize > 0)
- allocate_stack(common, extrasize);
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- if (extrasize > 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- if (needs_control_head)
- {
- SLJIT_ASSERT(extrasize == 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- }
- }
- else
- {
- extrasize = needs_control_head ? 3 : 2;
- allocate_stack(common, framesize + extrasize);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- if (needs_control_head)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- }
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
- }
- memset(&altbacktrack, 0, sizeof(backtrack_common));
- if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
- {
- /* Control verbs cannot escape from these asserts. */
- local_quit_available = TRUE;
- common->local_quit_available = TRUE;
- common->quit_label = NULL;
- common->quit = NULL;
- }
- common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
- common->positive_assertion_quit = NULL;
- while (1)
- {
- common->accept_label = NULL;
- common->accept = NULL;
- altbacktrack.top = NULL;
- altbacktrack.topbacktracks = NULL;
- if (*ccbegin == OP_ALT && extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- altbacktrack.cc = ccbegin;
- compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- if (local_quit_available)
- {
- common->local_quit_available = save_local_quit_available;
- common->quit_label = save_quit_label;
- common->quit = save_quit;
- }
- common->in_positive_assertion = save_in_positive_assertion;
- common->then_trap = save_then_trap;
- common->accept_label = save_accept_label;
- common->positive_assertion_quit = save_positive_assertion_quit;
- common->accept = save_accept;
- return NULL;
- }
- common->accept_label = LABEL();
- if (common->accept != NULL)
- set_jumps(common->accept, common->accept_label);
- /* Reset stack. */
- if (framesize < 0)
- {
- if (framesize == no_frame)
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- else if (extrasize > 0)
- free_stack(common, extrasize);
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
- }
- else
- {
- if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
- {
- /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
- }
- else
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
- }
- }
- if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
- {
- /* We know that STR_PTR was stored on the top of the stack. */
- if (conditional)
- {
- if (extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
- }
- else if (bra == OP_BRAZERO)
- {
- if (framesize < 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
- else
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
- }
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- else if (framesize >= 0)
- {
- /* For OP_BRA and OP_BRAMINZERO. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
- }
- }
- add_jump(compiler, found, JUMP(SLJIT_JUMP));
- compile_backtrackingpath(common, altbacktrack.top);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- if (local_quit_available)
- {
- common->local_quit_available = save_local_quit_available;
- common->quit_label = save_quit_label;
- common->quit = save_quit;
- }
- common->in_positive_assertion = save_in_positive_assertion;
- common->then_trap = save_then_trap;
- common->accept_label = save_accept_label;
- common->positive_assertion_quit = save_positive_assertion_quit;
- common->accept = save_accept;
- return NULL;
- }
- set_jumps(altbacktrack.topbacktracks, LABEL());
- if (*cc != OP_ALT)
- break;
- ccbegin = cc;
- cc += GET(cc, 1);
- }
- if (local_quit_available)
- {
- SLJIT_ASSERT(common->positive_assertion_quit == NULL);
- /* Makes the check less complicated below. */
- common->positive_assertion_quit = common->quit;
- }
- /* None of them matched. */
- if (common->positive_assertion_quit != NULL)
- {
- jump = JUMP(SLJIT_JUMP);
- set_jumps(common->positive_assertion_quit, LABEL());
- SLJIT_ASSERT(framesize != no_stack);
- if (framesize < 0)
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
- else
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
- }
- JUMPHERE(jump);
- }
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
- if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
- {
- /* Assert is failed. */
- if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (framesize < 0)
- {
- /* The topmost item should be 0. */
- if (bra == OP_BRAZERO)
- {
- if (extrasize == 2)
- free_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- else if (extrasize > 0)
- free_stack(common, extrasize);
- }
- else
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
- /* The topmost item should be 0. */
- if (bra == OP_BRAZERO)
- {
- free_stack(common, framesize + extrasize - 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- else
- free_stack(common, framesize + extrasize);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
- }
- jump = JUMP(SLJIT_JUMP);
- if (bra != OP_BRAZERO)
- add_jump(compiler, target, jump);
- /* Assert is successful. */
- set_jumps(tmp, LABEL());
- if (framesize < 0)
- {
- /* We know that STR_PTR was stored on the top of the stack. */
- if (extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
- /* Keep the STR_PTR on the top of the stack. */
- if (bra == OP_BRAZERO)
- {
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- if (extrasize == 2)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- }
- else if (bra == OP_BRAMINZERO)
- {
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- }
- else
- {
- if (bra == OP_BRA)
- {
- /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
- }
- else
- {
- /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
- if (extrasize == 2)
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (bra == OP_BRAMINZERO)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- else
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
- }
- }
- }
- if (bra == OP_BRAZERO)
- {
- backtrack->matchingpath = LABEL();
- SET_LABEL(jump, backtrack->matchingpath);
- }
- else if (bra == OP_BRAMINZERO)
- {
- JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
- JUMPHERE(brajump);
- if (framesize >= 0)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
- }
- set_jumps(backtrack->common.topbacktracks, LABEL());
- }
- }
- else
- {
- /* AssertNot is successful. */
- if (framesize < 0)
- {
- if (extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (bra != OP_BRA)
- {
- if (extrasize == 2)
- free_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- else if (extrasize > 0)
- free_stack(common, extrasize);
- }
- else
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
- /* The topmost item should be 0. */
- if (bra != OP_BRA)
- {
- free_stack(common, framesize + extrasize - 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- else
- free_stack(common, framesize + extrasize);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
- }
- if (bra == OP_BRAZERO)
- backtrack->matchingpath = LABEL();
- else if (bra == OP_BRAMINZERO)
- {
- JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
- JUMPHERE(brajump);
- }
- if (bra != OP_BRA)
- {
- SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
- set_jumps(backtrack->common.topbacktracks, LABEL());
- backtrack->common.topbacktracks = NULL;
- }
- }
- if (local_quit_available)
- {
- common->local_quit_available = save_local_quit_available;
- common->quit_label = save_quit_label;
- common->quit = save_quit;
- }
- common->in_positive_assertion = save_in_positive_assertion;
- common->then_trap = save_then_trap;
- common->accept_label = save_accept_label;
- common->positive_assertion_quit = save_positive_assertion_quit;
- common->accept = save_accept;
- return cc + 1 + LINK_SIZE;
- }
- static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
- {
- DEFINE_COMPILER;
- int stacksize;
- if (framesize < 0)
- {
- if (framesize == no_frame)
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- else
- {
- stacksize = needs_control_head ? 1 : 0;
- if (ket != OP_KET || has_alternatives)
- stacksize++;
- if (stacksize > 0)
- free_stack(common, stacksize);
- }
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
- /* TMP2 which is set here used by OP_KETRMAX below. */
- if (ket == OP_KETRMAX)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
- else if (ket == OP_KETRMIN)
- {
- /* Move the STR_PTR to the private_data_ptr. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
- }
- }
- else
- {
- stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
- if (ket == OP_KETRMAX)
- {
- /* TMP2 which is set here used by OP_KETRMAX below. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- }
- }
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
- }
- static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
- {
- DEFINE_COMPILER;
- if (common->capture_last_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
- stacksize++;
- }
- if (common->optimized_cbracket[offset >> 1] == 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- stacksize += 2;
- }
- return stacksize;
- }
- /*
- Handling bracketed expressions is probably the most complex part.
- Stack layout naming characters:
- S - Push the current STR_PTR
- 0 - Push a 0 (NULL)
- A - Push the current STR_PTR. Needed for restoring the STR_PTR
- before the next alternative. Not pushed if there are no alternatives.
- M - Any values pushed by the current alternative. Can be empty, or anything.
- C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
- L - Push the previous local (pointed by localptr) to the stack
- () - opional values stored on the stack
- ()* - optonal, can be stored multiple times
- The following list shows the regular expression templates, their PCRE byte codes
- and stack layout supported by pcre-sljit.
- (?:) OP_BRA | OP_KET A M
- () OP_CBRA | OP_KET C M
- (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
- OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
- (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
- OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
- ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
- OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
- ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
- OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
- (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
- (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
- ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
- ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
- (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
- OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
- (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
- OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
- ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
- OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
- ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
- OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
- Stack layout naming characters:
- A - Push the alternative index (starting from 0) on the stack.
- Not pushed if there is no alternatives.
- M - Any values pushed by the current alternative. Can be empty, or anything.
- The next list shows the possible content of a bracket:
- (|) OP_*BRA | OP_ALT ... M A
- (?()|) OP_*COND | OP_ALT M A
- (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
- Or nothing, if trace is unnecessary
- */
- static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- PCRE2_UCHAR opcode;
- int private_data_ptr = 0;
- int offset = 0;
- int i, stacksize;
- int repeat_ptr = 0, repeat_length = 0;
- int repeat_type = 0, repeat_count = 0;
- PCRE2_SPTR ccbegin;
- PCRE2_SPTR matchingpath;
- PCRE2_SPTR slot;
- PCRE2_UCHAR bra = OP_BRA;
- PCRE2_UCHAR ket;
- assert_backtrack *assert;
- BOOL has_alternatives;
- BOOL needs_control_head = FALSE;
- struct sljit_jump *jump;
- struct sljit_jump *skip;
- struct sljit_label *rmax_label = NULL;
- struct sljit_jump *braminzero = NULL;
- PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
- if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
- {
- bra = *cc;
- cc++;
- opcode = *cc;
- }
- opcode = *cc;
- ccbegin = cc;
- matchingpath = bracketend(cc) - 1 - LINK_SIZE;
- ket = *matchingpath;
- if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
- {
- repeat_ptr = PRIVATE_DATA(matchingpath);
- repeat_length = PRIVATE_DATA(matchingpath + 1);
- repeat_type = PRIVATE_DATA(matchingpath + 2);
- repeat_count = PRIVATE_DATA(matchingpath + 3);
- SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
- if (repeat_type == OP_UPTO)
- ket = OP_KETRMAX;
- if (repeat_type == OP_MINUPTO)
- ket = OP_KETRMIN;
- }
- matchingpath = ccbegin + 1 + LINK_SIZE;
- SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
- SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
- cc += GET(cc, 1);
- has_alternatives = *cc == OP_ALT;
- if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
- {
- SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
- compile_time_checks_must_be_grouped_together);
- has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
- }
- if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
- opcode = OP_SCOND;
- if (opcode == OP_CBRA || opcode == OP_SCBRA)
- {
- /* Capturing brackets has a pre-allocated space. */
- offset = GET2(ccbegin, 1 + LINK_SIZE);
- if (common->optimized_cbracket[offset] == 0)
- {
- private_data_ptr = OVECTOR_PRIV(offset);
- offset <<= 1;
- }
- else
- {
- offset <<= 1;
- private_data_ptr = OVECTOR(offset);
- }
- BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
- matchingpath += IMM2_SIZE;
- }
- else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
- {
- /* Other brackets simply allocate the next entry. */
- private_data_ptr = PRIVATE_DATA(ccbegin);
- SLJIT_ASSERT(private_data_ptr != 0);
- BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
- if (opcode == OP_ONCE)
- BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
- }
- /* Instructions before the first alternative. */
- stacksize = 0;
- if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
- stacksize++;
- if (bra == OP_BRAZERO)
- stacksize++;
- if (stacksize > 0)
- allocate_stack(common, stacksize);
- stacksize = 0;
- if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
- stacksize++;
- }
- if (bra == OP_BRAZERO)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
- if (bra == OP_BRAMINZERO)
- {
- /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (ket != OP_KETRMIN)
- {
- free_stack(common, 1);
- braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- }
- else
- {
- if (opcode == OP_ONCE || opcode >= OP_SBRA)
- {
- jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- /* Nothing stored during the first run. */
- skip = JUMP(SLJIT_JUMP);
- JUMPHERE(jump);
- /* Checking zero-length iteration. */
- if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
- {
- /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
- braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- }
- else
- {
- /* Except when the whole stack frame must be saved. */
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
- }
- JUMPHERE(skip);
- }
- else
- {
- jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- JUMPHERE(jump);
- }
- }
- }
- if (repeat_type != 0)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
- if (repeat_type == OP_EXACT)
- rmax_label = LABEL();
- }
- if (ket == OP_KETRMIN)
- BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
- if (ket == OP_KETRMAX)
- {
- rmax_label = LABEL();
- if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
- BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
- }
- /* Handling capturing brackets and alternatives. */
- if (opcode == OP_ONCE)
- {
- stacksize = 0;
- if (needs_control_head)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- stacksize++;
- }
- if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
- {
- /* Neither capturing brackets nor recursions are found in the block. */
- if (ket == OP_KETRMIN)
- {
- stacksize += 2;
- if (!needs_control_head)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- }
- else
- {
- if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
- if (ket == OP_KETRMAX || has_alternatives)
- stacksize++;
- }
- if (stacksize > 0)
- allocate_stack(common, stacksize);
- stacksize = 0;
- if (needs_control_head)
- {
- stacksize++;
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- }
- if (ket == OP_KETRMIN)
- {
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
- if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
- OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
- }
- else if (ket == OP_KETRMAX || has_alternatives)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
- }
- else
- {
- if (ket != OP_KET || has_alternatives)
- stacksize++;
- stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
- allocate_stack(common, stacksize);
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
- stacksize = needs_control_head ? 1 : 0;
- if (ket != OP_KET || has_alternatives)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
- stacksize++;
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
- }
- else
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
- }
- init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
- }
- }
- else if (opcode == OP_CBRA || opcode == OP_SCBRA)
- {
- /* Saving the previous values. */
- if (common->optimized_cbracket[offset >> 1] != 0)
- {
- SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
- }
- else
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- }
- }
- else if (opcode == OP_SBRA || opcode == OP_SCOND)
- {
- /* Saving the previous value. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- }
- else if (has_alternatives)
- {
- /* Pushing the starting string pointer. */
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- }
- /* Generating code for the first alternative. */
- if (opcode == OP_COND || opcode == OP_SCOND)
- {
- if (*matchingpath == OP_CREF)
- {
- SLJIT_ASSERT(has_alternatives);
- add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
- CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
- matchingpath += 1 + IMM2_SIZE;
- }
- else if (*matchingpath == OP_DNCREF)
- {
- SLJIT_ASSERT(has_alternatives);
- i = GET2(matchingpath, 1 + IMM2_SIZE);
- slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
- OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
- slot += common->name_entry_size;
- i--;
- while (i-- > 0)
- {
- OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
- OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
- slot += common->name_entry_size;
- }
- OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
- add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
- matchingpath += 1 + 2 * IMM2_SIZE;
- }
- else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
- {
- /* Never has other case. */
- BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
- SLJIT_ASSERT(!has_alternatives);
- if (*matchingpath == OP_TRUE)
- {
- stacksize = 1;
- matchingpath++;
- }
- else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
- stacksize = 0;
- else if (*matchingpath == OP_RREF)
- {
- stacksize = GET2(matchingpath, 1);
- if (common->currententry == NULL)
- stacksize = 0;
- else if (stacksize == RREF_ANY)
- stacksize = 1;
- else if (common->currententry->start == 0)
- stacksize = stacksize == 0;
- else
- stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
- if (stacksize != 0)
- matchingpath += 1 + IMM2_SIZE;
- }
- else
- {
- if (common->currententry == NULL || common->currententry->start == 0)
- stacksize = 0;
- else
- {
- stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
- slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
- i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
- while (stacksize > 0)
- {
- if ((int)GET2(slot, 0) == i)
- break;
- slot += common->name_entry_size;
- stacksize--;
- }
- }
- if (stacksize != 0)
- matchingpath += 1 + 2 * IMM2_SIZE;
- }
- /* The stacksize == 0 is a common "else" case. */
- if (stacksize == 0)
- {
- if (*cc == OP_ALT)
- {
- matchingpath = cc + 1 + LINK_SIZE;
- cc += GET(cc, 1);
- }
- else
- matchingpath = cc;
- }
- }
- else
- {
- SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
- /* Similar code as PUSH_BACKTRACK macro. */
- assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return NULL;
- memset(assert, 0, sizeof(assert_backtrack));
- assert->common.cc = matchingpath;
- BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
- matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
- }
- }
- compile_matchingpath(common, matchingpath, cc, backtrack);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return NULL;
- if (opcode == OP_ONCE)
- match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
- stacksize = 0;
- if (repeat_type == OP_MINUPTO)
- {
- /* We need to preserve the counter. TMP2 will be used below. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
- stacksize++;
- }
- if (ket != OP_KET || bra != OP_BRA)
- stacksize++;
- if (offset != 0)
- {
- if (common->capture_last_ptr != 0)
- stacksize++;
- if (common->optimized_cbracket[offset >> 1] == 0)
- stacksize += 2;
- }
- if (has_alternatives && opcode != OP_ONCE)
- stacksize++;
- if (stacksize > 0)
- allocate_stack(common, stacksize);
- stacksize = 0;
- if (repeat_type == OP_MINUPTO)
- {
- /* TMP2 was set above. */
- OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
- stacksize++;
- }
- if (ket != OP_KET || bra != OP_BRA)
- {
- if (ket != OP_KET)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
- stacksize++;
- }
- if (offset != 0)
- stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
- if (has_alternatives)
- {
- if (opcode != OP_ONCE)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
- if (ket != OP_KETRMAX)
- BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
- }
- /* Must be after the matchingpath label. */
- if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
- {
- SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
- }
- if (ket == OP_KETRMAX)
- {
- if (repeat_type != 0)
- {
- if (has_alternatives)
- BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, rmax_label);
- /* Drop STR_PTR for greedy plus quantifier. */
- if (opcode != OP_ONCE)
- free_stack(common, 1);
- }
- else if (opcode == OP_ONCE || opcode >= OP_SBRA)
- {
- if (has_alternatives)
- BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
- /* Checking zero-length iteration. */
- if (opcode != OP_ONCE)
- {
- CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
- /* Drop STR_PTR for greedy plus quantifier. */
- if (bra != OP_BRAZERO)
- free_stack(common, 1);
- }
- else
- /* TMP2 must contain the starting STR_PTR. */
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
- }
- else
- JUMPTO(SLJIT_JUMP, rmax_label);
- BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
- }
- if (repeat_type == OP_EXACT)
- {
- count_match(common);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, rmax_label);
- }
- else if (repeat_type == OP_UPTO)
- {
- /* We need to preserve the counter. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- }
- if (bra == OP_BRAZERO)
- BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
- if (bra == OP_BRAMINZERO)
- {
- /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
- JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
- if (braminzero != NULL)
- {
- JUMPHERE(braminzero);
- /* We need to release the end pointer to perform the
- backtrack for the zero-length iteration. When
- framesize is < 0, OP_ONCE will do the release itself. */
- if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
- }
- else if (ket == OP_KETRMIN && opcode != OP_ONCE)
- free_stack(common, 1);
- }
- /* Continue to the normal backtrack. */
- }
- if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
- count_match(common);
- /* Skip the other alternatives. */
- while (*cc == OP_ALT)
- cc += GET(cc, 1);
- cc += 1 + LINK_SIZE;
- if (opcode == OP_ONCE)
- {
- /* We temporarily encode the needs_control_head in the lowest bit.
- Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
- the same value for small signed numbers (including negative numbers). */
- BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
- }
- return cc + repeat_length;
- }
- static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- PCRE2_UCHAR opcode;
- int private_data_ptr;
- int cbraprivptr = 0;
- BOOL needs_control_head;
- int framesize;
- int stacksize;
- int offset = 0;
- BOOL zero = FALSE;
- PCRE2_SPTR ccbegin = NULL;
- int stack; /* Also contains the offset of control head. */
- struct sljit_label *loop = NULL;
- struct jump_list *emptymatch = NULL;
- PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
- if (*cc == OP_BRAPOSZERO)
- {
- zero = TRUE;
- cc++;
- }
- opcode = *cc;
- private_data_ptr = PRIVATE_DATA(cc);
- SLJIT_ASSERT(private_data_ptr != 0);
- BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
- switch(opcode)
- {
- case OP_BRAPOS:
- case OP_SBRAPOS:
- ccbegin = cc + 1 + LINK_SIZE;
- break;
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- offset = GET2(cc, 1 + LINK_SIZE);
- /* This case cannot be optimized in the same was as
- normal capturing brackets. */
- SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
- cbraprivptr = OVECTOR_PRIV(offset);
- offset <<= 1;
- ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
- BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
- if (framesize < 0)
- {
- if (offset != 0)
- {
- stacksize = 2;
- if (common->capture_last_ptr != 0)
- stacksize++;
- }
- else
- stacksize = 1;
- if (needs_control_head)
- stacksize++;
- if (!zero)
- stacksize++;
- BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
- allocate_stack(common, stacksize);
- if (framesize == no_frame)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
- stack = 0;
- if (offset != 0)
- {
- stack = 2;
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
- if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- if (common->capture_last_ptr != 0)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
- stack = 3;
- }
- }
- else
- {
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- stack = 1;
- }
- if (needs_control_head)
- stack++;
- if (!zero)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
- if (needs_control_head)
- {
- stack--;
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
- }
- }
- else
- {
- stacksize = framesize + 1;
- if (!zero)
- stacksize++;
- if (needs_control_head)
- stacksize++;
- if (offset == 0)
- stacksize++;
- BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
- allocate_stack(common, stacksize);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- if (needs_control_head)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
- stack = 0;
- if (!zero)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
- stack = 1;
- }
- if (needs_control_head)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
- stack++;
- }
- if (offset == 0)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
- stack++;
- }
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
- init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
- stack -= 1 + (offset == 0);
- }
- if (offset != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
- loop = LABEL();
- while (*cc != OP_KETRPOS)
- {
- backtrack->top = NULL;
- backtrack->topbacktracks = NULL;
- cc += GET(cc, 1);
- compile_matchingpath(common, ccbegin, cc, backtrack);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return NULL;
- if (framesize < 0)
- {
- if (framesize == no_frame)
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- if (offset != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
- if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- }
- else
- {
- if (opcode == OP_SBRAPOS)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- }
- /* Even if the match is empty, we need to reset the control head. */
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
- if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
- add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
- if (!zero)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
- }
- else
- {
- if (offset != 0)
- {
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
- if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- }
- else
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
- if (opcode == OP_SBRAPOS)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
- }
- /* Even if the match is empty, we need to reset the control head. */
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
- if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
- add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
- if (!zero)
- {
- if (framesize < 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
- }
- JUMPTO(SLJIT_JUMP, loop);
- flush_stubs(common);
- compile_backtrackingpath(common, backtrack->top);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return NULL;
- set_jumps(backtrack->topbacktracks, LABEL());
- if (framesize < 0)
- {
- if (offset != 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
- else
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- }
- else
- {
- if (offset != 0)
- {
- /* Last alternative. */
- if (*cc == OP_KETRPOS)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
- }
- else
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
- }
- }
- if (*cc == OP_KETRPOS)
- break;
- ccbegin = cc + 1 + LINK_SIZE;
- }
- /* We don't have to restore the control head in case of a failed match. */
- backtrack->topbacktracks = NULL;
- if (!zero)
- {
- if (framesize < 0)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
- else /* TMP2 is set to [private_data_ptr] above. */
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
- }
- /* None of them matched. */
- set_jumps(emptymatch, LABEL());
- count_match(common);
- return cc + 1 + LINK_SIZE;
- }
- static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
- {
- int class_len;
- *opcode = *cc;
- *exact = 0;
- if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
- {
- cc++;
- *type = OP_CHAR;
- }
- else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
- {
- cc++;
- *type = OP_CHARI;
- *opcode -= OP_STARI - OP_STAR;
- }
- else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
- {
- cc++;
- *type = OP_NOT;
- *opcode -= OP_NOTSTAR - OP_STAR;
- }
- else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
- {
- cc++;
- *type = OP_NOTI;
- *opcode -= OP_NOTSTARI - OP_STAR;
- }
- else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
- {
- cc++;
- *opcode -= OP_TYPESTAR - OP_STAR;
- *type = OP_END;
- }
- else
- {
- SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
- *type = *opcode;
- cc++;
- class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
- *opcode = cc[class_len - 1];
- if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
- {
- *opcode -= OP_CRSTAR - OP_STAR;
- *end = cc + class_len;
- if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
- {
- *exact = 1;
- *opcode -= OP_PLUS - OP_STAR;
- }
- }
- else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
- {
- *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
- *end = cc + class_len;
- if (*opcode == OP_POSPLUS)
- {
- *exact = 1;
- *opcode = OP_POSSTAR;
- }
- }
- else
- {
- SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
- *max = GET2(cc, (class_len + IMM2_SIZE));
- *exact = GET2(cc, class_len);
- if (*max == 0)
- {
- if (*opcode == OP_CRPOSRANGE)
- *opcode = OP_POSSTAR;
- else
- *opcode -= OP_CRRANGE - OP_STAR;
- }
- else
- {
- *max -= *exact;
- if (*max == 0)
- *opcode = OP_EXACT;
- else if (*max == 1)
- {
- if (*opcode == OP_CRPOSRANGE)
- *opcode = OP_POSQUERY;
- else
- *opcode -= OP_CRRANGE - OP_QUERY;
- }
- else
- {
- if (*opcode == OP_CRPOSRANGE)
- *opcode = OP_POSUPTO;
- else
- *opcode -= OP_CRRANGE - OP_UPTO;
- }
- }
- *end = cc + class_len + 2 * IMM2_SIZE;
- }
- return cc;
- }
- switch(*opcode)
- {
- case OP_EXACT:
- *exact = GET2(cc, 0);
- cc += IMM2_SIZE;
- break;
- case OP_PLUS:
- case OP_MINPLUS:
- *exact = 1;
- *opcode -= OP_PLUS - OP_STAR;
- break;
- case OP_POSPLUS:
- *exact = 1;
- *opcode = OP_POSSTAR;
- break;
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_POSUPTO:
- *max = GET2(cc, 0);
- cc += IMM2_SIZE;
- break;
- }
- if (*type == OP_END)
- {
- *type = *cc;
- *end = next_opcode(common, cc);
- cc++;
- return cc;
- }
- *end = cc + 1;
- #ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
- #endif
- return cc;
- }
- static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- PCRE2_UCHAR opcode;
- PCRE2_UCHAR type;
- sljit_u32 max = 0, exact;
- BOOL fast_fail;
- sljit_s32 fast_str_ptr;
- BOOL charpos_enabled;
- PCRE2_UCHAR charpos_char;
- unsigned int charpos_othercasebit;
- PCRE2_SPTR end;
- jump_list *no_match = NULL;
- jump_list *no_char1_match = NULL;
- struct sljit_jump *jump = NULL;
- struct sljit_label *label;
- int private_data_ptr = PRIVATE_DATA(cc);
- int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
- int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
- int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
- int tmp_base, tmp_offset;
- PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
- fast_str_ptr = PRIVATE_DATA(cc + 1);
- fast_fail = TRUE;
- SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
- if (cc == common->fast_forward_bc_ptr)
- fast_fail = FALSE;
- else if (common->fast_fail_start_ptr == 0)
- fast_str_ptr = 0;
- SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
- || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
- cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
- if (type != OP_EXTUNI)
- {
- tmp_base = TMP3;
- tmp_offset = 0;
- }
- else
- {
- tmp_base = SLJIT_MEM1(SLJIT_SP);
- tmp_offset = POSSESSIVE0;
- }
- if (fast_fail && fast_str_ptr != 0)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
- /* Handle fixed part first. */
- if (exact > 1)
- {
- SLJIT_ASSERT(fast_str_ptr == 0);
- if (common->mode == PCRE2_JIT_COMPLETE
- #ifdef SUPPORT_UNICODE
- && !common->utf
- #endif
- )
- {
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
- {
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- }
- else if (exact == 1)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
- switch(opcode)
- {
- case OP_STAR:
- case OP_UPTO:
- SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
- if (type == OP_ANYNL || type == OP_EXTUNI)
- {
- SLJIT_ASSERT(private_data_ptr == 0);
- SLJIT_ASSERT(fast_str_ptr == 0);
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
- if (opcode == OP_UPTO)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
- if (opcode == OP_UPTO)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- jump = JUMP(SLJIT_ZERO);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
- }
- /* We cannot use TMP3 because of this allocate_stack. */
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, label);
- if (jump != NULL)
- JUMPHERE(jump);
- }
- else
- {
- charpos_enabled = FALSE;
- charpos_char = 0;
- charpos_othercasebit = 0;
- if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
- {
- charpos_enabled = TRUE;
- #ifdef SUPPORT_UNICODE
- charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
- #endif
- if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
- {
- charpos_othercasebit = char_get_othercase_bit(common, end + 1);
- if (charpos_othercasebit == 0)
- charpos_enabled = FALSE;
- }
- if (charpos_enabled)
- {
- charpos_char = end[1];
- /* Consumpe the OP_CHAR opcode. */
- end += 2;
- #if PCRE2_CODE_UNIT_WIDTH == 8
- SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
- #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
- if ((charpos_othercasebit & 0x100) != 0)
- charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
- #endif
- if (charpos_othercasebit != 0)
- charpos_char |= charpos_othercasebit;
- BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
- BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
- BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
- }
- }
- if (charpos_enabled)
- {
- if (opcode == OP_UPTO)
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
- /* Search the first instance of charpos_char. */
- jump = JUMP(SLJIT_JUMP);
- label = LABEL();
- if (opcode == OP_UPTO)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
- }
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- JUMPHERE(jump);
- detect_partial_match(common, &backtrack->topbacktracks);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (charpos_othercasebit != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
- if (private_data_ptr == 0)
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
- if (opcode == OP_UPTO)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
- }
- /* Search the last instance of charpos_char. */
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- detect_partial_match(common, &no_match);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (charpos_othercasebit != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
- if (opcode == OP_STAR)
- {
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- }
- else
- {
- jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPHERE(jump);
- }
- if (opcode == OP_UPTO)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
- JUMPTO(SLJIT_JUMP, label);
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- }
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- else if (common->utf)
- {
- if (private_data_ptr == 0)
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
- if (opcode == OP_UPTO)
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (opcode == OP_UPTO)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
- JUMPTO(SLJIT_JUMP, label);
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- }
- #endif
- else
- {
- if (private_data_ptr == 0)
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
- if (opcode == OP_UPTO)
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
- label = LABEL();
- detect_partial_match(common, &no_match);
- compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
- if (opcode == OP_UPTO)
- {
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
- else
- JUMPTO(SLJIT_JUMP, label);
- set_jumps(no_char1_match, LABEL());
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- }
- }
- BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
- break;
- case OP_MINSTAR:
- if (private_data_ptr == 0)
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- break;
- case OP_MINUPTO:
- SLJIT_ASSERT(fast_str_ptr == 0);
- if (private_data_ptr == 0)
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
- BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
- break;
- case OP_QUERY:
- case OP_MINQUERY:
- SLJIT_ASSERT(fast_str_ptr == 0);
- if (private_data_ptr == 0)
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (opcode == OP_QUERY)
- compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
- BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
- break;
- case OP_EXACT:
- break;
- case OP_POSSTAR:
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
- {
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, label);
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- break;
- }
- #endif
- label = LABEL();
- detect_partial_match(common, &no_match);
- compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
- JUMPTO(SLJIT_JUMP, label);
- set_jumps(no_char1_match, LABEL());
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- set_jumps(no_match, LABEL());
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- break;
- case OP_POSUPTO:
- SLJIT_ASSERT(fast_str_ptr == 0);
- #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
- break;
- }
- #endif
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
- label = LABEL();
- detect_partial_match(common, &no_match);
- compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- set_jumps(no_char1_match, LABEL());
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- set_jumps(no_match, LABEL());
- break;
- case OP_POSQUERY:
- SLJIT_ASSERT(fast_str_ptr == 0);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- count_match(common);
- return end;
- }
- static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
- if (*cc == OP_FAIL)
- {
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
- return cc + 1;
- }
- if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
- if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
- {
- /* No need to check notempty conditions. */
- if (common->accept_label == NULL)
- add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
- else
- JUMPTO(SLJIT_JUMP, common->accept_label);
- return cc + 1;
- }
- if (common->accept_label == NULL)
- add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
- else
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
- if (common->accept_label == NULL)
- add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
- else
- JUMPTO(SLJIT_ZERO, common->accept_label);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
- if (common->accept_label == NULL)
- add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
- else
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
- return cc + 1;
- }
- static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
- {
- DEFINE_COMPILER;
- int offset = GET2(cc, 1);
- BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
- /* Data will be discarded anyway... */
- if (common->currententry != NULL)
- return cc + 1 + IMM2_SIZE;
- if (!optimized_cbracket)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
- offset <<= 1;
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
- if (!optimized_cbracket)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- return cc + 1 + IMM2_SIZE;
- }
- static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- PCRE2_UCHAR opcode = *cc;
- PCRE2_SPTR ccend = cc + 1;
- if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
- ccend += 2 + cc[1];
- PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
- if (opcode == OP_SKIP)
- {
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- return ccend;
- }
- if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
- {
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
- }
- return ccend;
- }
- static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
- static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- BOOL needs_control_head;
- int size;
- PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
- common->then_trap = BACKTRACK_AS(then_trap_backtrack);
- BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
- BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
- BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
- size = BACKTRACK_AS(then_trap_backtrack)->framesize;
- size = 3 + (size < 0 ? 0 : size);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- allocate_stack(common, size);
- if (size > 3)
- OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
- size = BACKTRACK_AS(then_trap_backtrack)->framesize;
- if (size >= 0)
- init_frame(common, cc, ccend, size - 1, 0);
- }
- static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
- {
- DEFINE_COMPILER;
- backtrack_common *backtrack;
- BOOL has_then_trap = FALSE;
- then_trap_backtrack *save_then_trap = NULL;
- SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
- if (common->has_then && common->then_offsets[cc - common->start] != 0)
- {
- SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
- has_then_trap = TRUE;
- save_then_trap = common->then_trap;
- /* Tail item on backtrack. */
- compile_then_trap_matchingpath(common, cc, ccend, parent);
- }
- while (cc < ccend)
- {
- switch(*cc)
- {
- case OP_SOD:
- case OP_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_DOLL:
- case OP_DOLLM:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_REVERSE:
- cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
- break;
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
- case OP_NOTPROP:
- case OP_PROP:
- case OP_ANYNL:
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- case OP_EXTUNI:
- case OP_NOT:
- case OP_NOTI:
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
- break;
- case OP_SET_SOM:
- PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- cc++;
- break;
- case OP_CHAR:
- case OP_CHARI:
- if (common->mode == PCRE2_JIT_COMPLETE)
- cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
- else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
- break;
- case OP_STAR:
- case OP_MINSTAR:
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_QUERY:
- case OP_MINQUERY:
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_EXACT:
- case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_POSQUERY:
- case OP_POSUPTO:
- case OP_STARI:
- case OP_MINSTARI:
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_QUERYI:
- case OP_MINQUERYI:
- case OP_UPTOI:
- case OP_MINUPTOI:
- case OP_EXACTI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
- case OP_POSQUERYI:
- case OP_POSUPTOI:
- case OP_NOTSTAR:
- case OP_NOTMINSTAR:
- case OP_NOTPLUS:
- case OP_NOTMINPLUS:
- case OP_NOTQUERY:
- case OP_NOTMINQUERY:
- case OP_NOTUPTO:
- case OP_NOTMINUPTO:
- case OP_NOTEXACT:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSUPTO:
- case OP_NOTSTARI:
- case OP_NOTMINSTARI:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
- case OP_NOTQUERYI:
- case OP_NOTMINQUERYI:
- case OP_NOTUPTOI:
- case OP_NOTMINUPTOI:
- case OP_NOTEXACTI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERYI:
- case OP_NOTPOSUPTOI:
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- case OP_TYPEPOSUPTO:
- cc = compile_iterator_matchingpath(common, cc, parent);
- break;
- case OP_CLASS:
- case OP_NCLASS:
- if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
- cc = compile_iterator_matchingpath(common, cc, parent);
- else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
- break;
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
- case OP_XCLASS:
- if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
- cc = compile_iterator_matchingpath(common, cc, parent);
- else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
- break;
- #endif
- case OP_REF:
- case OP_REFI:
- if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
- cc = compile_ref_iterator_matchingpath(common, cc, parent);
- else
- {
- compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
- cc += 1 + IMM2_SIZE;
- }
- break;
- case OP_DNREF:
- case OP_DNREFI:
- if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
- cc = compile_ref_iterator_matchingpath(common, cc, parent);
- else
- {
- compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
- compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
- cc += 1 + 2 * IMM2_SIZE;
- }
- break;
- case OP_RECURSE:
- cc = compile_recurse_matchingpath(common, cc, parent);
- break;
- case OP_CALLOUT:
- case OP_CALLOUT_STR:
- cc = compile_callout_matchingpath(common, cc, parent);
- break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
- cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
- break;
- case OP_BRAMINZERO:
- PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
- cc = bracketend(cc + 1);
- if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
- {
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- }
- else
- {
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
- }
- BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
- count_match(common);
- break;
- case OP_ONCE:
- case OP_BRA:
- case OP_CBRA:
- case OP_COND:
- case OP_SBRA:
- case OP_SCBRA:
- case OP_SCOND:
- cc = compile_bracket_matchingpath(common, cc, parent);
- break;
- case OP_BRAZERO:
- if (cc[1] > OP_ASSERTBACK_NOT)
- cc = compile_bracket_matchingpath(common, cc, parent);
- else
- {
- PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
- cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
- }
- break;
- case OP_BRAPOS:
- case OP_CBRAPOS:
- case OP_SBRAPOS:
- case OP_SCBRAPOS:
- case OP_BRAPOSZERO:
- cc = compile_bracketpos_matchingpath(common, cc, parent);
- break;
- case OP_MARK:
- PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
- SLJIT_ASSERT(common->mark_ptr != 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
- allocate_stack(common, common->has_skip_arg ? 5 : 1);
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
- if (common->has_skip_arg)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
- }
- cc += 1 + 2 + cc[1];
- break;
- case OP_PRUNE:
- case OP_PRUNE_ARG:
- case OP_SKIP:
- case OP_SKIP_ARG:
- case OP_THEN:
- case OP_THEN_ARG:
- case OP_COMMIT:
- cc = compile_control_verb_matchingpath(common, cc, parent);
- break;
- case OP_FAIL:
- case OP_ACCEPT:
- case OP_ASSERT_ACCEPT:
- cc = compile_fail_accept_matchingpath(common, cc, parent);
- break;
- case OP_CLOSE:
- cc = compile_close_matchingpath(common, cc);
- break;
- case OP_SKIPZERO:
- cc = bracketend(cc + 1);
- break;
- default:
- SLJIT_UNREACHABLE();
- return;
- }
- if (cc == NULL)
- return;
- }
- if (has_then_trap)
- {
- /* Head item on backtrack. */
- PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
- BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
- BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
- common->then_trap = save_then_trap;
- }
- SLJIT_ASSERT(cc == ccend);
- }
- #undef PUSH_BACKTRACK
- #undef PUSH_BACKTRACK_NOVALUE
- #undef BACKTRACK_AS
- #define COMPILE_BACKTRACKINGPATH(current) \
- do \
- { \
- compile_backtrackingpath(common, (current)); \
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
- return; \
- } \
- while (0)
- #define CURRENT_AS(type) ((type *)current)
- static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- PCRE2_SPTR cc = current->cc;
- PCRE2_UCHAR opcode;
- PCRE2_UCHAR type;
- sljit_u32 max = 0, exact;
- struct sljit_label *label = NULL;
- struct sljit_jump *jump = NULL;
- jump_list *jumplist = NULL;
- PCRE2_SPTR end;
- int private_data_ptr = PRIVATE_DATA(cc);
- int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
- int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
- int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
- cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
- switch(opcode)
- {
- case OP_STAR:
- case OP_UPTO:
- if (type == OP_ANYNL || type == OP_EXTUNI)
- {
- SLJIT_ASSERT(private_data_ptr == 0);
- set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- }
- else
- {
- if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
- {
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP1(SLJIT_MOV, TMP2, 0, base, offset1);
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
- label = LABEL();
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- skip_char_back(common);
- CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
- }
- else
- {
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
- skip_char_back(common);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- }
- JUMPHERE(jump);
- if (private_data_ptr == 0)
- free_stack(common, 2);
- }
- break;
- case OP_MINSTAR:
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- set_jumps(jumplist, LABEL());
- if (private_data_ptr == 0)
- free_stack(common, 1);
- break;
- case OP_MINUPTO:
- OP1(SLJIT_MOV, TMP1, 0, base, offset1);
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
- OP1(SLJIT_MOV, base, offset1, TMP1, 0);
- compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- set_jumps(jumplist, LABEL());
- if (private_data_ptr == 0)
- free_stack(common, 2);
- break;
- case OP_QUERY:
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- jump = JUMP(SLJIT_JUMP);
- set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- JUMPHERE(jump);
- if (private_data_ptr == 0)
- free_stack(common, 1);
- break;
- case OP_MINQUERY:
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
- set_jumps(jumplist, LABEL());
- JUMPHERE(jump);
- if (private_data_ptr == 0)
- free_stack(common, 1);
- break;
- case OP_EXACT:
- case OP_POSSTAR:
- case OP_POSQUERY:
- case OP_POSUPTO:
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- set_jumps(current->topbacktracks, LABEL());
- }
- static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- PCRE2_SPTR cc = current->cc;
- BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
- PCRE2_UCHAR type;
- type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
- if ((type & 0x1) == 0)
- {
- /* Maximize case. */
- set_jumps(current->topbacktracks, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
- return;
- }
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
- set_jumps(current->topbacktracks, LABEL());
- free_stack(common, ref ? 2 : 3);
- }
- static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- recurse_entry *entry;
- if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
- {
- entry = CURRENT_AS(recurse_backtrack)->entry;
- if (entry->backtrack_label == NULL)
- add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
- else
- JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
- }
- else
- compile_backtrackingpath(common, current->top);
- set_jumps(current->topbacktracks, LABEL());
- }
- static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- PCRE2_SPTR cc = current->cc;
- PCRE2_UCHAR bra = OP_BRA;
- struct sljit_jump *brajump = NULL;
- SLJIT_ASSERT(*cc != OP_BRAMINZERO);
- if (*cc == OP_BRAZERO)
- {
- bra = *cc;
- cc++;
- }
- if (bra == OP_BRAZERO)
- {
- SLJIT_ASSERT(current->topbacktracks == NULL);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- }
- if (CURRENT_AS(assert_backtrack)->framesize < 0)
- {
- set_jumps(current->topbacktracks, LABEL());
- if (bra == OP_BRAZERO)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
- free_stack(common, 1);
- }
- return;
- }
- if (bra == OP_BRAZERO)
- {
- if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
- free_stack(common, 1);
- return;
- }
- free_stack(common, 1);
- brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- }
- if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
- set_jumps(current->topbacktracks, LABEL());
- }
- else
- set_jumps(current->topbacktracks, LABEL());
- if (bra == OP_BRAZERO)
- {
- /* We know there is enough place on the stack. */
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
- JUMPHERE(brajump);
- }
- }
- static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- int opcode, stacksize, alt_count, alt_max;
- int offset = 0;
- int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
- int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
- PCRE2_SPTR cc = current->cc;
- PCRE2_SPTR ccbegin;
- PCRE2_SPTR ccprev;
- PCRE2_UCHAR bra = OP_BRA;
- PCRE2_UCHAR ket;
- assert_backtrack *assert;
- sljit_uw *next_update_addr = NULL;
- BOOL has_alternatives;
- BOOL needs_control_head = FALSE;
- struct sljit_jump *brazero = NULL;
- struct sljit_jump *alt1 = NULL;
- struct sljit_jump *alt2 = NULL;
- struct sljit_jump *once = NULL;
- struct sljit_jump *cond = NULL;
- struct sljit_label *rmin_label = NULL;
- struct sljit_label *exact_label = NULL;
- if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
- {
- bra = *cc;
- cc++;
- }
- opcode = *cc;
- ccbegin = bracketend(cc) - 1 - LINK_SIZE;
- ket = *ccbegin;
- if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
- {
- repeat_ptr = PRIVATE_DATA(ccbegin);
- repeat_type = PRIVATE_DATA(ccbegin + 2);
- repeat_count = PRIVATE_DATA(ccbegin + 3);
- SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
- if (repeat_type == OP_UPTO)
- ket = OP_KETRMAX;
- if (repeat_type == OP_MINUPTO)
- ket = OP_KETRMIN;
- }
- ccbegin = cc;
- cc += GET(cc, 1);
- has_alternatives = *cc == OP_ALT;
- if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
- has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
- if (opcode == OP_CBRA || opcode == OP_SCBRA)
- offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
- if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
- opcode = OP_SCOND;
- alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
- /* Decoding the needs_control_head in framesize. */
- if (opcode == OP_ONCE)
- {
- needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
- CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
- }
- if (ket != OP_KET && repeat_type != 0)
- {
- /* TMP1 is used in OP_KETRMIN below. */
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- if (repeat_type == OP_UPTO)
- OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
- }
- if (ket == OP_KETRMAX)
- {
- if (bra == OP_BRAZERO)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
- }
- }
- else if (ket == OP_KETRMIN)
- {
- if (bra != OP_BRAMINZERO)
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (repeat_type != 0)
- {
- /* TMP1 was set a few lines above. */
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
- /* Drop STR_PTR for non-greedy plus quantifier. */
- if (opcode != OP_ONCE)
- free_stack(common, 1);
- }
- else if (opcode >= OP_SBRA || opcode == OP_ONCE)
- {
- /* Checking zero-length iteration. */
- if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
- else
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
- }
- /* Drop STR_PTR for non-greedy plus quantifier. */
- if (opcode != OP_ONCE)
- free_stack(common, 1);
- }
- else
- JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
- }
- rmin_label = LABEL();
- if (repeat_type != 0)
- OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
- }
- else if (bra == OP_BRAZERO)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
- }
- else if (repeat_type == OP_EXACT)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
- exact_label = LABEL();
- }
- if (offset != 0)
- {
- if (common->capture_last_ptr != 0)
- {
- SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
- free_stack(common, 3);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
- }
- else if (common->optimized_cbracket[offset >> 1] == 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- free_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
- }
- }
- if (SLJIT_UNLIKELY(opcode == OP_ONCE))
- {
- if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
- }
- once = JUMP(SLJIT_JUMP);
- }
- else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
- {
- if (has_alternatives)
- {
- /* Always exactly one alternative. */
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- alt_max = 2;
- alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
- }
- }
- else if (has_alternatives)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- if (alt_max > 4)
- {
- /* Table jump if alt_max is greater than 4. */
- next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
- if (SLJIT_UNLIKELY(next_update_addr == NULL))
- return;
- sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
- add_label_addr(common, next_update_addr++);
- }
- else
- {
- if (alt_max == 4)
- alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
- alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
- }
- }
- COMPILE_BACKTRACKINGPATH(current->top);
- if (current->topbacktracks)
- set_jumps(current->topbacktracks, LABEL());
- if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
- {
- /* Conditional block always has at most one alternative. */
- if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
- {
- SLJIT_ASSERT(has_alternatives);
- assert = CURRENT_AS(bracket_backtrack)->u.assert;
- if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
- }
- cond = JUMP(SLJIT_JUMP);
- set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
- }
- else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
- {
- SLJIT_ASSERT(has_alternatives);
- cond = JUMP(SLJIT_JUMP);
- set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
- }
- else
- SLJIT_ASSERT(!has_alternatives);
- }
- if (has_alternatives)
- {
- alt_count = sizeof(sljit_uw);
- do
- {
- current->top = NULL;
- current->topbacktracks = NULL;
- current->nextbacktracks = NULL;
- /* Conditional blocks always have an additional alternative, even if it is empty. */
- if (*cc == OP_ALT)
- {
- ccprev = cc + 1 + LINK_SIZE;
- cc += GET(cc, 1);
- if (opcode != OP_COND && opcode != OP_SCOND)
- {
- if (opcode != OP_ONCE)
- {
- if (private_data_ptr != 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- else
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- }
- else
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
- }
- compile_matchingpath(common, ccprev, cc, current);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return;
- }
- /* Instructions after the current alternative is successfully matched. */
- /* There is a similar code in compile_bracket_matchingpath. */
- if (opcode == OP_ONCE)
- match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
- stacksize = 0;
- if (repeat_type == OP_MINUPTO)
- {
- /* We need to preserve the counter. TMP2 will be used below. */
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
- stacksize++;
- }
- if (ket != OP_KET || bra != OP_BRA)
- stacksize++;
- if (offset != 0)
- {
- if (common->capture_last_ptr != 0)
- stacksize++;
- if (common->optimized_cbracket[offset >> 1] == 0)
- stacksize += 2;
- }
- if (opcode != OP_ONCE)
- stacksize++;
- if (stacksize > 0)
- allocate_stack(common, stacksize);
- stacksize = 0;
- if (repeat_type == OP_MINUPTO)
- {
- /* TMP2 was set above. */
- OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
- stacksize++;
- }
- if (ket != OP_KET || bra != OP_BRA)
- {
- if (ket != OP_KET)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
- else
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
- stacksize++;
- }
- if (offset != 0)
- stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
- if (opcode != OP_ONCE)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
- if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
- {
- /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
- SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
- }
- JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
- if (opcode != OP_ONCE)
- {
- if (alt_max > 4)
- add_label_addr(common, next_update_addr++);
- else
- {
- if (alt_count != 2 * sizeof(sljit_uw))
- {
- JUMPHERE(alt1);
- if (alt_max == 3 && alt_count == sizeof(sljit_uw))
- alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
- }
- else
- {
- JUMPHERE(alt2);
- if (alt_max == 4)
- alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
- }
- }
- alt_count += sizeof(sljit_uw);
- }
- COMPILE_BACKTRACKINGPATH(current->top);
- if (current->topbacktracks)
- set_jumps(current->topbacktracks, LABEL());
- SLJIT_ASSERT(!current->nextbacktracks);
- }
- while (*cc == OP_ALT);
- if (cond != NULL)
- {
- SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
- assert = CURRENT_AS(bracket_backtrack)->u.assert;
- if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
- {
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
- }
- JUMPHERE(cond);
- }
- /* Free the STR_PTR. */
- if (private_data_ptr == 0)
- free_stack(common, 1);
- }
- if (offset != 0)
- {
- /* Using both tmp register is better for instruction scheduling. */
- if (common->optimized_cbracket[offset >> 1] != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- free_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
- }
- else
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
- }
- }
- else if (opcode == OP_SBRA || opcode == OP_SCOND)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- }
- else if (opcode == OP_ONCE)
- {
- cc = ccbegin + GET(ccbegin, 1);
- stacksize = needs_control_head ? 1 : 0;
- if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
- {
- /* Reset head and drop saved frame. */
- stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
- }
- else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
- {
- /* The STR_PTR must be released. */
- stacksize++;
- }
- if (stacksize > 0)
- free_stack(common, stacksize);
- JUMPHERE(once);
- /* Restore previous private_data_ptr */
- if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
- else if (ket == OP_KETRMIN)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- /* See the comment below. */
- free_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
- }
- }
- if (repeat_type == OP_EXACT)
- {
- OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
- CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
- }
- else if (ket == OP_KETRMAX)
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- if (bra != OP_BRAZERO)
- free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
- if (bra == OP_BRAZERO)
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
- JUMPHERE(brazero);
- free_stack(common, 1);
- }
- }
- else if (ket == OP_KETRMIN)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- /* OP_ONCE removes everything in case of a backtrack, so we don't
- need to explicitly release the STR_PTR. The extra release would
- affect badly the free_stack(2) above. */
- if (opcode != OP_ONCE)
- free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
- if (opcode == OP_ONCE)
- free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
- else if (bra == OP_BRAMINZERO)
- free_stack(common, 1);
- }
- else if (bra == OP_BRAZERO)
- {
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
- JUMPHERE(brazero);
- }
- }
- static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- int offset;
- struct sljit_jump *jump;
- if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
- {
- if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
- {
- offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
- if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
- if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
- }
- set_jumps(current->topbacktracks, LABEL());
- free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
- return;
- }
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
- if (current->topbacktracks)
- {
- jump = JUMP(SLJIT_JUMP);
- set_jumps(current->topbacktracks, LABEL());
- /* Drop the stack frame. */
- free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
- JUMPHERE(jump);
- }
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
- }
- static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- assert_backtrack backtrack;
- current->top = NULL;
- current->topbacktracks = NULL;
- current->nextbacktracks = NULL;
- if (current->cc[1] > OP_ASSERTBACK_NOT)
- {
- /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
- compile_bracket_matchingpath(common, current->cc, current);
- compile_bracket_backtrackingpath(common, current->top);
- }
- else
- {
- memset(&backtrack, 0, sizeof(backtrack));
- backtrack.common.cc = current->cc;
- backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
- /* Manual call of compile_assert_matchingpath. */
- compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
- }
- SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
- }
- static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- PCRE2_UCHAR opcode = *current->cc;
- struct sljit_label *loop;
- struct sljit_jump *jump;
- if (opcode == OP_THEN || opcode == OP_THEN_ARG)
- {
- if (common->then_trap != NULL)
- {
- SLJIT_ASSERT(common->control_head_ptr != 0);
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
- jump = JUMP(SLJIT_JUMP);
- loop = LABEL();
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- JUMPHERE(jump);
- CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
- CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
- add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
- return;
- }
- else if (!common->local_quit_available && common->in_positive_assertion)
- {
- add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
- return;
- }
- }
- if (common->local_quit_available)
- {
- /* Abort match with a fail. */
- if (common->quit_label == NULL)
- add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
- else
- JUMPTO(SLJIT_JUMP, common->quit_label);
- return;
- }
- if (opcode == OP_SKIP_ARG)
- {
- SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
- return;
- }
- if (opcode == OP_SKIP)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- else
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
- add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
- }
- static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- struct sljit_jump *jump;
- int size;
- if (CURRENT_AS(then_trap_backtrack)->then_trap)
- {
- common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
- return;
- }
- size = CURRENT_AS(then_trap_backtrack)->framesize;
- size = 3 + (size < 0 ? 0 : size);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
- free_stack(common, size);
- jump = JUMP(SLJIT_JUMP);
- set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
- /* STACK_TOP is set by THEN. */
- if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
- {
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
- }
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 3);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
- }
- static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
- {
- DEFINE_COMPILER;
- then_trap_backtrack *save_then_trap = common->then_trap;
- while (current)
- {
- if (current->nextbacktracks != NULL)
- set_jumps(current->nextbacktracks, LABEL());
- switch(*current->cc)
- {
- case OP_SET_SOM:
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
- break;
- case OP_STAR:
- case OP_MINSTAR:
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_QUERY:
- case OP_MINQUERY:
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_EXACT:
- case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_POSQUERY:
- case OP_POSUPTO:
- case OP_STARI:
- case OP_MINSTARI:
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_QUERYI:
- case OP_MINQUERYI:
- case OP_UPTOI:
- case OP_MINUPTOI:
- case OP_EXACTI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
- case OP_POSQUERYI:
- case OP_POSUPTOI:
- case OP_NOTSTAR:
- case OP_NOTMINSTAR:
- case OP_NOTPLUS:
- case OP_NOTMINPLUS:
- case OP_NOTQUERY:
- case OP_NOTMINQUERY:
- case OP_NOTUPTO:
- case OP_NOTMINUPTO:
- case OP_NOTEXACT:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSUPTO:
- case OP_NOTSTARI:
- case OP_NOTMINSTARI:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
- case OP_NOTQUERYI:
- case OP_NOTMINQUERYI:
- case OP_NOTUPTOI:
- case OP_NOTMINUPTOI:
- case OP_NOTEXACTI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERYI:
- case OP_NOTPOSUPTOI:
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- case OP_TYPEPOSUPTO:
- case OP_CLASS:
- case OP_NCLASS:
- #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- #endif
- compile_iterator_backtrackingpath(common, current);
- break;
- case OP_REF:
- case OP_REFI:
- case OP_DNREF:
- case OP_DNREFI:
- compile_ref_iterator_backtrackingpath(common, current);
- break;
- case OP_RECURSE:
- compile_recurse_backtrackingpath(common, current);
- break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- compile_assert_backtrackingpath(common, current);
- break;
- case OP_ONCE:
- case OP_BRA:
- case OP_CBRA:
- case OP_COND:
- case OP_SBRA:
- case OP_SCBRA:
- case OP_SCOND:
- compile_bracket_backtrackingpath(common, current);
- break;
- case OP_BRAZERO:
- if (current->cc[1] > OP_ASSERTBACK_NOT)
- compile_bracket_backtrackingpath(common, current);
- else
- compile_assert_backtrackingpath(common, current);
- break;
- case OP_BRAPOS:
- case OP_CBRAPOS:
- case OP_SBRAPOS:
- case OP_SCBRAPOS:
- case OP_BRAPOSZERO:
- compile_bracketpos_backtrackingpath(common, current);
- break;
- case OP_BRAMINZERO:
- compile_braminzero_backtrackingpath(common, current);
- break;
- case OP_MARK:
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
- if (common->has_skip_arg)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, common->has_skip_arg ? 5 : 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
- if (common->has_skip_arg)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
- break;
- case OP_THEN:
- case OP_THEN_ARG:
- case OP_PRUNE:
- case OP_PRUNE_ARG:
- case OP_SKIP:
- case OP_SKIP_ARG:
- compile_control_verb_backtrackingpath(common, current);
- break;
- case OP_COMMIT:
- if (!common->local_quit_available)
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- if (common->quit_label == NULL)
- add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
- else
- JUMPTO(SLJIT_JUMP, common->quit_label);
- break;
- case OP_CALLOUT:
- case OP_CALLOUT_STR:
- case OP_FAIL:
- case OP_ACCEPT:
- case OP_ASSERT_ACCEPT:
- set_jumps(current->topbacktracks, LABEL());
- break;
- case OP_THEN_TRAP:
- /* A virtual opcode for then traps. */
- compile_then_trap_backtrackingpath(common, current);
- break;
- default:
- SLJIT_UNREACHABLE();
- break;
- }
- current = current->prev;
- }
- common->then_trap = save_then_trap;
- }
- static SLJIT_INLINE void compile_recurse(compiler_common *common)
- {
- DEFINE_COMPILER;
- PCRE2_SPTR cc = common->start + common->currententry->start;
- PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
- PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
- BOOL needs_control_head;
- BOOL has_quit;
- BOOL has_accept;
- int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
- int alt_count, alt_max, local_size;
- backtrack_common altbacktrack;
- jump_list *match = NULL;
- sljit_uw *next_update_addr = NULL;
- struct sljit_jump *alt1 = NULL;
- struct sljit_jump *alt2 = NULL;
- struct sljit_jump *accept_exit = NULL;
- struct sljit_label *quit;
- /* Recurse captures then. */
- common->then_trap = NULL;
- SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
- alt_max = no_alternatives(cc);
- alt_count = 0;
- /* Matching path. */
- SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
- common->currententry->entry_label = LABEL();
- set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
- sljit_emit_fast_enter(compiler, TMP2, 0);
- count_match(common);
- local_size = (alt_max > 1) ? 2 : 1;
- /* (Reversed) stack layout:
- [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
- allocate_stack(common, private_data_size + local_size);
- /* Save return address. */
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
- /* This variable is saved and restored all time when we enter or exit from a recursive context. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
- if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- if (alt_max > 1)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- memset(&altbacktrack, 0, sizeof(backtrack_common));
- common->quit_label = NULL;
- common->accept_label = NULL;
- common->quit = NULL;
- common->accept = NULL;
- altbacktrack.cc = ccbegin;
- cc += GET(cc, 1);
- while (1)
- {
- altbacktrack.top = NULL;
- altbacktrack.topbacktracks = NULL;
- if (altbacktrack.cc != ccbegin)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return;
- allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- if (alt_max > 1 || has_accept)
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
- add_jump(compiler, &match, JUMP(SLJIT_JUMP));
- if (alt_count == 0)
- {
- /* Backtracking path entry. */
- SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
- common->currententry->backtrack_label = LABEL();
- set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
- sljit_emit_fast_enter(compiler, TMP1, 0);
- if (has_accept)
- accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- /* Save return address. */
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
- copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
- if (alt_max > 1)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- free_stack(common, 2);
- if (alt_max > 4)
- {
- /* Table jump if alt_max is greater than 4. */
- next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
- if (SLJIT_UNLIKELY(next_update_addr == NULL))
- return;
- sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
- add_label_addr(common, next_update_addr++);
- }
- else
- {
- if (alt_max == 4)
- alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
- alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
- }
- }
- else
- free_stack(common, has_accept ? 2 : 1);
- }
- else if (alt_max > 4)
- add_label_addr(common, next_update_addr++);
- else
- {
- if (alt_count != 2 * sizeof(sljit_uw))
- {
- JUMPHERE(alt1);
- if (alt_max == 3 && alt_count == sizeof(sljit_uw))
- alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
- }
- else
- {
- JUMPHERE(alt2);
- if (alt_max == 4)
- alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
- }
- }
- alt_count += sizeof(sljit_uw);
- compile_backtrackingpath(common, altbacktrack.top);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- return;
- set_jumps(altbacktrack.topbacktracks, LABEL());
- if (*cc != OP_ALT)
- break;
- altbacktrack.cc = cc + 1 + LINK_SIZE;
- cc += GET(cc, 1);
- }
- /* No alternative is matched. */
- quit = LABEL();
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
- free_stack(common, private_data_size + local_size);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- sljit_emit_fast_return(compiler, TMP2, 0);
- if (common->quit != NULL)
- {
- SLJIT_ASSERT(has_quit);
- set_jumps(common->quit, LABEL());
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
- JUMPTO(SLJIT_JUMP, quit);
- }
- if (has_accept)
- {
- JUMPHERE(accept_exit);
- free_stack(common, 2);
- /* Save return address. */
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
- free_stack(common, private_data_size + local_size);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- sljit_emit_fast_return(compiler, TMP2, 0);
- }
- if (common->accept != NULL)
- {
- SLJIT_ASSERT(has_accept);
- set_jumps(common->accept, LABEL());
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
- }
- set_jumps(match, LABEL());
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
- sljit_emit_fast_return(compiler, TMP2, 0);
- }
- #undef COMPILE_BACKTRACKINGPATH
- #undef CURRENT_AS
- static int jit_compile(pcre2_code *code, sljit_u32 mode)
- {
- pcre2_real_code *re = (pcre2_real_code *)code;
- struct sljit_compiler *compiler;
- backtrack_common rootbacktrack;
- compiler_common common_data;
- compiler_common *common = &common_data;
- const sljit_u8 *tables = re->tables;
- void *allocator_data = &re->memctl;
- int private_data_size;
- PCRE2_SPTR ccend;
- executable_functions *functions;
- void *executable_func;
- sljit_uw executable_size;
- sljit_uw total_length;
- label_addr_list *label_addr;
- struct sljit_label *mainloop_label = NULL;
- struct sljit_label *continue_match_label;
- struct sljit_label *empty_match_found_label = NULL;
- struct sljit_label *empty_match_backtrack_label = NULL;
- struct sljit_label *reset_match_label;
- struct sljit_label *quit_label;
- struct sljit_jump *jump;
- struct sljit_jump *minlength_check_failed = NULL;
- struct sljit_jump *reqbyte_notfound = NULL;
- struct sljit_jump *empty_match = NULL;
- struct sljit_jump *end_anchor_failed = NULL;
- SLJIT_ASSERT(tables);
- memset(&rootbacktrack, 0, sizeof(backtrack_common));
- memset(common, 0, sizeof(compiler_common));
- common->re = re;
- common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
- rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
- common->start = rootbacktrack.cc;
- common->read_only_data_head = NULL;
- common->fcc = tables + fcc_offset;
- common->lcc = (sljit_sw)(tables + lcc_offset);
- common->mode = mode;
- common->might_be_empty = re->minlength == 0;
- common->nltype = NLTYPE_FIXED;
- switch(re->newline_convention)
- {
- case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
- case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
- case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
- case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
- case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
- default: return PCRE2_ERROR_INTERNAL;
- }
- common->nlmax = READ_CHAR_MAX;
- common->nlmin = 0;
- if (re->bsr_convention == PCRE2_BSR_UNICODE)
- common->bsr_nltype = NLTYPE_ANY;
- else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
- common->bsr_nltype = NLTYPE_ANYCRLF;
- else
- {
- #ifdef BSR_ANYCRLF
- common->bsr_nltype = NLTYPE_ANYCRLF;
- #else
- common->bsr_nltype = NLTYPE_ANY;
- #endif
- }
- common->bsr_nlmax = READ_CHAR_MAX;
- common->bsr_nlmin = 0;
- common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
- common->ctypes = (sljit_sw)(tables + ctypes_offset);
- common->name_count = re->name_count;
- common->name_entry_size = re->name_entry_size;
- common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
- common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
- #ifdef SUPPORT_UNICODE
- /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
- common->utf = (re->overall_options & PCRE2_UTF) != 0;
- common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
- if (common->utf)
- {
- if (common->nltype == NLTYPE_ANY)
- common->nlmax = 0x2029;
- else if (common->nltype == NLTYPE_ANYCRLF)
- common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
- else
- {
- /* We only care about the first newline character. */
- common->nlmax = common->newline & 0xff;
- }
- if (common->nltype == NLTYPE_FIXED)
- common->nlmin = common->newline & 0xff;
- else
- common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
- if (common->bsr_nltype == NLTYPE_ANY)
- common->bsr_nlmax = 0x2029;
- else
- common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
- common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
- }
- #endif /* SUPPORT_UNICODE */
- ccend = bracketend(common->start);
- /* Calculate the local space size on the stack. */
- common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
- common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
- if (!common->optimized_cbracket)
- return PCRE2_ERROR_NOMEMORY;
- #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
- memset(common->optimized_cbracket, 0, re->top_bracket + 1);
- #else
- memset(common->optimized_cbracket, 1, re->top_bracket + 1);
- #endif
- SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
- #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
- common->capture_last_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- #endif
- if (!check_opcode_types(common, common->start, ccend))
- {
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- /* Checking flags and updating ovector_start. */
- if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
- {
- common->req_char_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- if (mode != PCRE2_JIT_COMPLETE)
- {
- common->start_used_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- if (mode == PCRE2_JIT_PARTIAL_SOFT)
- {
- common->hit_start = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- }
- if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
- {
- common->match_end_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
- common->control_head_ptr = 1;
- #endif
- if (common->control_head_ptr != 0)
- {
- common->control_head_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- if (common->has_set_som)
- {
- /* Saving the real start pointer is necessary. */
- common->start_ptr = common->ovector_start;
- common->ovector_start += sizeof(sljit_sw);
- }
- /* Aligning ovector to even number of sljit words. */
- if ((common->ovector_start & sizeof(sljit_sw)) != 0)
- common->ovector_start += sizeof(sljit_sw);
- if (common->start_ptr == 0)
- common->start_ptr = OVECTOR(0);
- /* Capturing brackets cannot be optimized if callouts are allowed. */
- if (common->capture_last_ptr != 0)
- memset(common->optimized_cbracket, 0, re->top_bracket + 1);
- SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
- common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
- total_length = ccend - common->start;
- common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
- if (!common->private_data_ptrs)
- {
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
- private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
- set_private_data_ptrs(common, &private_data_size, ccend);
- if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
- {
- if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
- detect_fast_fail(common, common->start, &private_data_size, 4);
- }
- SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
- if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
- {
- SLJIT_FREE(common->private_data_ptrs, allocator_data);
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- if (common->has_then)
- {
- common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
- memset(common->then_offsets, 0, total_length);
- set_then_offsets(common, common->start, NULL);
- }
- compiler = sljit_create_compiler(allocator_data);
- if (!compiler)
- {
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- SLJIT_FREE(common->private_data_ptrs, allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- common->compiler = compiler;
- /* Main pcre_jit_exec entry. */
- sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
- /* Register init. */
- reset_ovector(common, (re->top_bracket + 1) * 2);
- if (common->req_char_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
- OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
- OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
- OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
- if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
- reset_fast_fail(common);
- if (mode == PCRE2_JIT_PARTIAL_SOFT)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
- if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
- if (common->control_head_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- /* Main part of the matching */
- if ((re->overall_options & PCRE2_ANCHORED) == 0)
- {
- mainloop_label = mainloop_entry(common);
- continue_match_label = LABEL();
- /* Forward search if possible. */
- if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
- {
- if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
- ;
- else if ((re->flags & PCRE2_FIRSTSET) != 0)
- fast_forward_first_char(common);
- else if ((re->flags & PCRE2_STARTLINE) != 0)
- fast_forward_newline(common);
- else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
- fast_forward_start_bits(common);
- }
- }
- else
- continue_match_label = LABEL();
- if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
- {
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
- minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
- }
- if (common->req_char_ptr != 0)
- reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
- /* Store the current STR_PTR in OVECTOR(0). */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
- /* Copy the limit of allowed recursions. */
- OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
- if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
- if (common->fast_forward_bc_ptr != NULL)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
- if (common->start_ptr != OVECTOR(0))
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
- /* Copy the beginning of the string. */
- if (mode == PCRE2_JIT_PARTIAL_SOFT)
- {
- jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
- JUMPHERE(jump);
- }
- else if (mode == PCRE2_JIT_PARTIAL_HARD)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
- compile_matchingpath(common, common->start, ccend, &rootbacktrack);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- sljit_free_compiler(compiler);
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- SLJIT_FREE(common->private_data_ptrs, allocator_data);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
- end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->might_be_empty)
- {
- empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
- empty_match_found_label = LABEL();
- }
- common->accept_label = LABEL();
- if (common->accept != NULL)
- set_jumps(common->accept, common->accept_label);
- /* This means we have a match. Update the ovector. */
- copy_ovector(common, re->top_bracket + 1);
- common->quit_label = common->abort_label = LABEL();
- if (common->quit != NULL)
- set_jumps(common->quit, common->quit_label);
- if (common->abort != NULL)
- set_jumps(common->abort, common->abort_label);
- if (minlength_check_failed != NULL)
- SET_LABEL(minlength_check_failed, common->abort_label);
- sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
- if (common->failed_match != NULL)
- {
- SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
- set_jumps(common->failed_match, LABEL());
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- JUMPTO(SLJIT_JUMP, common->abort_label);
- }
- if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
- JUMPHERE(end_anchor_failed);
- if (mode != PCRE2_JIT_COMPLETE)
- {
- common->partialmatchlabel = LABEL();
- set_jumps(common->partialmatch, common->partialmatchlabel);
- return_with_partial_match(common, common->quit_label);
- }
- if (common->might_be_empty)
- empty_match_backtrack_label = LABEL();
- compile_backtrackingpath(common, rootbacktrack.top);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- sljit_free_compiler(compiler);
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- SLJIT_FREE(common->private_data_ptrs, allocator_data);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- SLJIT_ASSERT(rootbacktrack.prev == NULL);
- reset_match_label = LABEL();
- if (mode == PCRE2_JIT_PARTIAL_SOFT)
- {
- /* Update hit_start only in the first time. */
- jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
- JUMPHERE(jump);
- }
- /* Check we have remaining characters. */
- if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- }
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
- (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
- if ((re->overall_options & PCRE2_ANCHORED) == 0)
- {
- if (common->ff_newline_shortcut != NULL)
- {
- /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
- if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
- {
- if (common->match_end_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
- CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- }
- else
- CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
- }
- }
- else
- CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
- }
- /* No more remaining characters. */
- if (reqbyte_notfound != NULL)
- JUMPHERE(reqbyte_notfound);
- if (mode == PCRE2_JIT_PARTIAL_SOFT)
- CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- JUMPTO(SLJIT_JUMP, common->quit_label);
- flush_stubs(common);
- if (common->might_be_empty)
- {
- JUMPHERE(empty_match);
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
- JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
- JUMPTO(SLJIT_ZERO, empty_match_found_label);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
- JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
- }
- common->fast_forward_bc_ptr = NULL;
- common->fast_fail_start_ptr = 0;
- common->fast_fail_end_ptr = 0;
- common->currententry = common->entries;
- common->local_quit_available = TRUE;
- quit_label = common->quit_label;
- while (common->currententry != NULL)
- {
- /* Might add new entries. */
- compile_recurse(common);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
- {
- sljit_free_compiler(compiler);
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- SLJIT_FREE(common->private_data_ptrs, allocator_data);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- flush_stubs(common);
- common->currententry = common->currententry->next;
- }
- common->local_quit_available = FALSE;
- common->quit_label = quit_label;
- /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
- /* This is a (really) rare case. */
- set_jumps(common->stackalloc, LABEL());
- /* RETURN_ADDR is not a saved register. */
- sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
- OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
- OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
- OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
- jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
- OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
- sljit_emit_fast_return(compiler, TMP1, 0);
- /* Allocation failed. */
- JUMPHERE(jump);
- /* We break the return address cache here, but this is a really rare case. */
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
- JUMPTO(SLJIT_JUMP, common->quit_label);
- /* Call limit reached. */
- set_jumps(common->calllimit, LABEL());
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
- JUMPTO(SLJIT_JUMP, common->quit_label);
- if (common->revertframes != NULL)
- {
- set_jumps(common->revertframes, LABEL());
- do_revertframes(common);
- }
- if (common->wordboundary != NULL)
- {
- set_jumps(common->wordboundary, LABEL());
- check_wordboundary(common);
- }
- if (common->anynewline != NULL)
- {
- set_jumps(common->anynewline, LABEL());
- check_anynewline(common);
- }
- if (common->hspace != NULL)
- {
- set_jumps(common->hspace, LABEL());
- check_hspace(common);
- }
- if (common->vspace != NULL)
- {
- set_jumps(common->vspace, LABEL());
- check_vspace(common);
- }
- if (common->casefulcmp != NULL)
- {
- set_jumps(common->casefulcmp, LABEL());
- do_casefulcmp(common);
- }
- if (common->caselesscmp != NULL)
- {
- set_jumps(common->caselesscmp, LABEL());
- do_caselesscmp(common);
- }
- if (common->reset_match != NULL)
- {
- set_jumps(common->reset_match, LABEL());
- do_reset_match(common, (re->top_bracket + 1) * 2);
- CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
- OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
- JUMPTO(SLJIT_JUMP, reset_match_label);
- }
- #ifdef SUPPORT_UNICODE
- #if PCRE2_CODE_UNIT_WIDTH == 8
- if (common->utfreadchar != NULL)
- {
- set_jumps(common->utfreadchar, LABEL());
- do_utfreadchar(common);
- }
- if (common->utfreadchar16 != NULL)
- {
- set_jumps(common->utfreadchar16, LABEL());
- do_utfreadchar16(common);
- }
- if (common->utfreadtype8 != NULL)
- {
- set_jumps(common->utfreadtype8, LABEL());
- do_utfreadtype8(common);
- }
- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- if (common->getucd != NULL)
- {
- set_jumps(common->getucd, LABEL());
- do_getucd(common);
- }
- #endif /* SUPPORT_UNICODE */
- SLJIT_FREE(common->optimized_cbracket, allocator_data);
- SLJIT_FREE(common->private_data_ptrs, allocator_data);
- executable_func = sljit_generate_code(compiler);
- executable_size = sljit_get_generated_code_size(compiler);
- label_addr = common->label_addrs;
- while (label_addr != NULL)
- {
- *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
- label_addr = label_addr->next;
- }
- sljit_free_compiler(compiler);
- if (executable_func == NULL)
- {
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- /* Reuse the function descriptor if possible. */
- if (re->executable_jit != NULL)
- functions = (executable_functions *)re->executable_jit;
- else
- {
- functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
- if (functions == NULL)
- {
- /* This case is highly unlikely since we just recently
- freed a lot of memory. Not impossible though. */
- sljit_free_code(executable_func);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
- return PCRE2_ERROR_NOMEMORY;
- }
- memset(functions, 0, sizeof(executable_functions));
- functions->top_bracket = re->top_bracket + 1;
- functions->limit_match = re->limit_match;
- re->executable_jit = functions;
- }
- /* Turn mode into an index. */
- if (mode == PCRE2_JIT_COMPLETE)
- mode = 0;
- else
- mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
- SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
- functions->executable_funcs[mode] = executable_func;
- functions->read_only_data_heads[mode] = common->read_only_data_head;
- functions->executable_sizes[mode] = executable_size;
- return 0;
- }
- #endif
- /*************************************************
- * JIT compile a Regular Expression *
- *************************************************/
- /* This function used JIT to convert a previously-compiled pattern into machine
- code.
- Arguments:
- code a compiled pattern
- options JIT option bits
- Returns: 0: success or (*NOJIT) was used
- <0: an error code
- */
- #define PUBLIC_JIT_COMPILE_OPTIONS \
- (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD)
- PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
- pcre2_jit_compile(pcre2_code *code, uint32_t options)
- {
- #ifndef SUPPORT_JIT
- (void)code;
- (void)options;
- return PCRE2_ERROR_JIT_BADOPTION;
- #else /* SUPPORT_JIT */
- pcre2_real_code *re = (pcre2_real_code *)code;
- executable_functions *functions;
- int result;
- if (code == NULL)
- return PCRE2_ERROR_NULL;
- if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
- return PCRE2_ERROR_JIT_BADOPTION;
- if ((re->flags & PCRE2_NOJIT) != 0) return 0;
- functions = (executable_functions *)re->executable_jit;
- if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
- || functions->executable_funcs[0] == NULL)) {
- result = jit_compile(code, PCRE2_JIT_COMPLETE);
- if (result != 0)
- return result;
- }
- if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
- || functions->executable_funcs[1] == NULL)) {
- result = jit_compile(code, PCRE2_JIT_PARTIAL_SOFT);
- if (result != 0)
- return result;
- }
- if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
- || functions->executable_funcs[2] == NULL)) {
- result = jit_compile(code, PCRE2_JIT_PARTIAL_HARD);
- if (result != 0)
- return result;
- }
- return 0;
- #endif /* SUPPORT_JIT */
- }
- /* JIT compiler uses an all-in-one approach. This improves security,
- since the code generator functions are not exported. */
- #define INCLUDED_FROM_PCRE2_JIT_COMPILE
- #include "pcre2_jit_match.c"
- #include "pcre2_jit_misc.c"
- /* End of pcre2_jit_compile.c */
|