pcre2_match.c 201 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845
  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Written by Philip Hazel
  7. Original API code Copyright (c) 1997-2012 University of Cambridge
  8. New API code Copyright (c) 2015-2018 University of Cambridge
  9. -----------------------------------------------------------------------------
  10. Redistribution and use in source and binary forms, with or without
  11. modification, are permitted provided that the following conditions are met:
  12. * Redistributions of source code must retain the above copyright notice,
  13. this list of conditions and the following disclaimer.
  14. * Redistributions in binary form must reproduce the above copyright
  15. notice, this list of conditions and the following disclaimer in the
  16. documentation and/or other materials provided with the distribution.
  17. * Neither the name of the University of Cambridge nor the names of its
  18. contributors may be used to endorse or promote products derived from
  19. this software without specific prior written permission.
  20. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. POSSIBILITY OF SUCH DAMAGE.
  31. -----------------------------------------------------------------------------
  32. */
  33. #ifdef HAVE_CONFIG_H
  34. #include "config.h"
  35. #endif
  36. /* These defines enables debugging code */
  37. //#define DEBUG_FRAMES_DISPLAY
  38. //#define DEBUG_SHOW_OPS
  39. //#define DEBUG_SHOW_RMATCH
  40. #ifdef DEBUG_FRAME_DISPLAY
  41. #include <stdarg.h>
  42. #endif
  43. /* These defines identify the name of the block containing "static"
  44. information, and fields within it. */
  45. #define NLBLOCK mb /* Block containing newline information */
  46. #define PSSTART start_subject /* Field containing processed string start */
  47. #define PSEND end_subject /* Field containing processed string end */
  48. #include "pcre2_internal.h"
  49. #define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
  50. /* Masks for identifying the public options that are permitted at match time. */
  51. #define PUBLIC_MATCH_OPTIONS \
  52. (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
  53. PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
  54. PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
  55. #define PUBLIC_JIT_MATCH_OPTIONS \
  56. (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
  57. PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
  58. /* Non-error returns from and within the match() function. Error returns are
  59. externally defined PCRE2_ERROR_xxx codes, which are all negative. */
  60. #define MATCH_MATCH 1
  61. #define MATCH_NOMATCH 0
  62. /* Special internal returns used in the match() function. Make them
  63. sufficiently negative to avoid the external error codes. */
  64. #define MATCH_ACCEPT (-999)
  65. #define MATCH_KETRPOS (-998)
  66. /* The next 5 must be kept together and in sequence so that a test that checks
  67. for any one of them can use a range. */
  68. #define MATCH_COMMIT (-997)
  69. #define MATCH_PRUNE (-996)
  70. #define MATCH_SKIP (-995)
  71. #define MATCH_SKIP_ARG (-994)
  72. #define MATCH_THEN (-993)
  73. #define MATCH_BACKTRACK_MAX MATCH_THEN
  74. #define MATCH_BACKTRACK_MIN MATCH_COMMIT
  75. /* Group frame type values. Zero means the frame is not a group frame. The
  76. lower 16 bits are used for data (e.g. the capture number). Group frames are
  77. used for most groups so that information about the start is easily available at
  78. the end without having to scan back through intermediate frames (backtrack
  79. points). */
  80. #define GF_CAPTURE 0x00010000u
  81. #define GF_NOCAPTURE 0x00020000u
  82. #define GF_CONDASSERT 0x00030000u
  83. #define GF_RECURSE 0x00040000u
  84. /* Masks for the identity and data parts of the group frame type. */
  85. #define GF_IDMASK(a) ((a) & 0xffff0000u)
  86. #define GF_DATAMASK(a) ((a) & 0x0000ffffu)
  87. /* Repetition types */
  88. enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
  89. /* Min and max values for the common repeats; a maximum of UINT32_MAX =>
  90. infinity. */
  91. static const uint32_t rep_min[] = {
  92. 0, 0, /* * and *? */
  93. 1, 1, /* + and +? */
  94. 0, 0, /* ? and ?? */
  95. 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */
  96. 0, 1, 0 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
  97. static const uint32_t rep_max[] = {
  98. UINT32_MAX, UINT32_MAX, /* * and *? */
  99. UINT32_MAX, UINT32_MAX, /* + and +? */
  100. 1, 1, /* ? and ?? */
  101. 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */
  102. UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
  103. /* Repetition types - must include OP_CRPOSRANGE (not needed above) */
  104. static const uint32_t rep_typ[] = {
  105. REPTYPE_MAX, REPTYPE_MIN, /* * and *? */
  106. REPTYPE_MAX, REPTYPE_MIN, /* + and +? */
  107. REPTYPE_MAX, REPTYPE_MIN, /* ? and ?? */
  108. REPTYPE_MAX, REPTYPE_MIN, /* OP_CRRANGE and OP_CRMINRANGE */
  109. REPTYPE_POS, REPTYPE_POS, /* OP_CRPOSSTAR, OP_CRPOSPLUS */
  110. REPTYPE_POS, REPTYPE_POS }; /* OP_CRPOSQUERY, OP_CRPOSRANGE */
  111. /* Numbers for RMATCH calls at backtracking points. When these lists are
  112. changed, the code at RETURN_SWITCH below must be updated in sync. */
  113. enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
  114. RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
  115. RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
  116. RM31, RM32, RM33, RM34, RM35 };
  117. #ifdef SUPPORT_WIDE_CHARS
  118. enum { RM100=100, RM101 };
  119. #endif
  120. #ifdef SUPPORT_UNICODE
  121. enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
  122. RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215,
  123. RM216, RM217, RM218, RM219, RM220, RM221, RM222 };
  124. #endif
  125. /* Define short names for general fields in the current backtrack frame, which
  126. is always pointed to by the F variable. Occasional references to fields in
  127. other frames are written out explicitly. There are also some fields in the
  128. current frame whose names start with "temp" that are used for short-term,
  129. localised backtracking memory. These are #defined with Lxxx names at the point
  130. of use and undefined afterwards. */
  131. #define Fback_frame F->back_frame
  132. #define Fcapture_last F->capture_last
  133. #define Fcurrent_recurse F->current_recurse
  134. #define Fecode F->ecode
  135. #define Feptr F->eptr
  136. #define Fgroup_frame_type F->group_frame_type
  137. #define Flast_group_offset F->last_group_offset
  138. #define Flength F->length
  139. #define Fmark F->mark
  140. #define Frdepth F->rdepth
  141. #define Fstart_match F->start_match
  142. #define Foffset_top F->offset_top
  143. #define Foccu F->occu
  144. #define Fop F->op
  145. #define Fovector F->ovector
  146. #define Freturn_id F->return_id
  147. #ifdef DEBUG_FRAMES_DISPLAY
  148. /*************************************************
  149. * Display current frames and contents *
  150. *************************************************/
  151. /* This debugging function displays the current set of frames and their
  152. contents. It is not called automatically from anywhere, the intention being
  153. that calls can be inserted where necessary when debugging frame-related
  154. problems.
  155. Arguments:
  156. f the file to write to
  157. F the current top frame
  158. P a previous frame of interest
  159. frame_size the frame size
  160. mb points to the match block
  161. s identification text
  162. Returns: nothing
  163. */
  164. static void
  165. display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
  166. match_block *mb, const char *s, ...)
  167. {
  168. uint32_t i;
  169. heapframe *Q;
  170. va_list ap;
  171. va_start(ap, s);
  172. fprintf(f, "FRAMES ");
  173. vfprintf(f, s, ap);
  174. va_end(ap);
  175. if (P != NULL) fprintf(f, " P=%lu",
  176. ((char *)P - (char *)(mb->match_frames))/frame_size);
  177. fprintf(f, "\n");
  178. for (i = 0, Q = mb->match_frames;
  179. Q <= F;
  180. i++, Q = (heapframe *)((char *)Q + frame_size))
  181. {
  182. fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
  183. i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
  184. Q->back_frame, Q->return_id);
  185. if (Q->last_group_offset == PCRE2_UNSET)
  186. fprintf(f, " lgoffset=unset\n");
  187. else
  188. fprintf(f, " lgoffset=%lu\n", Q->last_group_offset/frame_size);
  189. }
  190. }
  191. #endif
  192. /*************************************************
  193. * Process a callout *
  194. *************************************************/
  195. /* This function is called for all callouts, whether "standalone" or at the
  196. start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
  197. OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
  198. with fixed values.
  199. Arguments:
  200. F points to the current backtracking frame
  201. mb points to the match block
  202. lengthptr where to return the length of the callout item
  203. Returns: the return from the callout
  204. or 0 if no callout function exists
  205. */
  206. static int
  207. do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
  208. {
  209. int rc;
  210. PCRE2_SIZE save0, save1;
  211. PCRE2_SIZE *callout_ovector;
  212. pcre2_callout_block *cb;
  213. *lengthptr = (*Fecode == OP_CALLOUT)?
  214. PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
  215. if (mb->callout == NULL) return 0; /* No callout function provided */
  216. /* The original matching code (pre 10.30) worked directly with the ovector
  217. passed by the user, and this was passed to callouts. Now that the working
  218. ovector is in the backtracking frame, it no longer needs to reserve space for
  219. the overall match offsets (which would waste space in the frame). For backward
  220. compatibility, however, we pass capture_top and offset_vector to the callout as
  221. if for the extended ovector, and we ensure that the first two slots are unset
  222. by preserving and restoring their current contents. Picky compilers complain if
  223. references such as Fovector[-2] are use directly, so we set up a separate
  224. pointer. */
  225. callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
  226. /* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
  227. are set externally. The first 3 never change; the last is updated for each
  228. bumpalong. */
  229. cb = mb->cb;
  230. cb->capture_top = (uint32_t)Foffset_top/2 + 1;
  231. cb->capture_last = Fcapture_last;
  232. cb->offset_vector = callout_ovector;
  233. cb->mark = mb->nomatch_mark;
  234. cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
  235. cb->pattern_position = GET(Fecode, 1);
  236. cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
  237. if (*Fecode == OP_CALLOUT) /* Numerical callout */
  238. {
  239. cb->callout_number = Fecode[1 + 2*LINK_SIZE];
  240. cb->callout_string_offset = 0;
  241. cb->callout_string = NULL;
  242. cb->callout_string_length = 0;
  243. }
  244. else /* String callout */
  245. {
  246. cb->callout_number = 0;
  247. cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
  248. cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
  249. cb->callout_string_length =
  250. *lengthptr - (1 + 4*LINK_SIZE) - 2;
  251. }
  252. save0 = callout_ovector[0];
  253. save1 = callout_ovector[1];
  254. callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
  255. rc = mb->callout(cb, mb->callout_data);
  256. callout_ovector[0] = save0;
  257. callout_ovector[1] = save1;
  258. cb->callout_flags = 0;
  259. return rc;
  260. }
  261. /*************************************************
  262. * Match a back-reference *
  263. *************************************************/
  264. /* This function is called only when it is known that the offset lies within
  265. the offsets that have so far been used in the match. Note that in caseless
  266. UTF-8 mode, the number of subject bytes matched may be different to the number
  267. of reference bytes. (In theory this could also happen in UTF-16 mode, but it
  268. seems unlikely.)
  269. Arguments:
  270. offset index into the offset vector
  271. caseless TRUE if caseless
  272. F the current backtracking frame pointer
  273. mb points to match block
  274. lengthptr pointer for returning the length matched
  275. Returns: = 0 sucessful match; number of code units matched is set
  276. < 0 no match
  277. > 0 partial match
  278. */
  279. static int
  280. match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
  281. PCRE2_SIZE *lengthptr)
  282. {
  283. PCRE2_SPTR p;
  284. PCRE2_SIZE length;
  285. PCRE2_SPTR eptr;
  286. PCRE2_SPTR eptr_start;
  287. /* Deal with an unset group. The default is no match, but there is an option to
  288. match an empty string. */
  289. if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
  290. {
  291. if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
  292. {
  293. *lengthptr = 0;
  294. return 0; /* Match */
  295. }
  296. else return -1; /* No match */
  297. }
  298. /* Separate the caseless and UTF cases for speed. */
  299. eptr = eptr_start = Feptr;
  300. p = mb->start_subject + Fovector[offset];
  301. length = Fovector[offset+1] - Fovector[offset];
  302. if (caseless)
  303. {
  304. #if defined SUPPORT_UNICODE
  305. if ((mb->poptions & PCRE2_UTF) != 0)
  306. {
  307. /* Match characters up to the end of the reference. NOTE: the number of
  308. code units matched may differ, because in UTF-8 there are some characters
  309. whose upper and lower case codes have different numbers of bytes. For
  310. example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
  311. bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
  312. sequence of two of the latter. It is important, therefore, to check the
  313. length along the reference, not along the subject (earlier code did this
  314. wrong). */
  315. PCRE2_SPTR endptr = p + length;
  316. while (p < endptr)
  317. {
  318. uint32_t c, d;
  319. const ucd_record *ur;
  320. if (eptr >= mb->end_subject) return 1; /* Partial match */
  321. GETCHARINC(c, eptr);
  322. GETCHARINC(d, p);
  323. ur = GET_UCD(d);
  324. if (c != d && c != (uint32_t)((int)d + ur->other_case))
  325. {
  326. const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
  327. for (;;)
  328. {
  329. if (c < *pp) return -1; /* No match */
  330. if (c == *pp++) break;
  331. }
  332. }
  333. }
  334. }
  335. else
  336. #endif
  337. /* Not in UTF mode */
  338. {
  339. for (; length > 0; length--)
  340. {
  341. uint32_t cc, cp;
  342. if (eptr >= mb->end_subject) return 1; /* Partial match */
  343. cc = UCHAR21TEST(eptr);
  344. cp = UCHAR21TEST(p);
  345. if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
  346. return -1; /* No match */
  347. p++;
  348. eptr++;
  349. }
  350. }
  351. }
  352. /* In the caseful case, we can just compare the code units, whether or not we
  353. are in UTF mode. When partial matching, we have to do this unit-by-unit. */
  354. else
  355. {
  356. if (mb->partial != 0)
  357. {
  358. for (; length > 0; length--)
  359. {
  360. if (eptr >= mb->end_subject) return 1; /* Partial match */
  361. if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /* No match */
  362. }
  363. }
  364. /* Not partial matching */
  365. else
  366. {
  367. if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
  368. if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */
  369. eptr += length;
  370. }
  371. }
  372. *lengthptr = eptr - eptr_start;
  373. return 0; /* Match */
  374. }
  375. /******************************************************************************
  376. *******************************************************************************
  377. "Recursion" in the match() function
  378. The original match() function was highly recursive, but this proved to be the
  379. source of a number of problems over the years, mostly because of the relatively
  380. small system stacks that are commonly found. As new features were added to
  381. patterns, various kludges were invented to reduce the amount of stack used,
  382. making the code hard to understand in places.
  383. A version did exist that used individual frames on the heap instead of calling
  384. match() recursively, but this ran substantially slower. The current version is
  385. a refactoring that uses a vector of frames to remember backtracking points.
  386. This runs no slower, and possibly even a bit faster than the original recursive
  387. implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
  388. 50 frames) is allocated on the system stack. If this is not big enough, the
  389. heap is used for a larger vector.
  390. *******************************************************************************
  391. ******************************************************************************/
  392. /*************************************************
  393. * Macros for the match() function *
  394. *************************************************/
  395. /* These macros pack up tests that are used for partial matching several times
  396. in the code. We set the "hit end" flag if the pointer is at the end of the
  397. subject and also past the earliest inspected character (i.e. something has been
  398. matched, even if not part of the actual matched string). For hard partial
  399. matching, we then return immediately. The second one is used when we already
  400. know we are past the end of the subject. */
  401. #define CHECK_PARTIAL()\
  402. if (mb->partial != 0 && Feptr >= mb->end_subject && \
  403. Feptr > mb->start_used_ptr) \
  404. { \
  405. mb->hitend = TRUE; \
  406. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
  407. }
  408. #define SCHECK_PARTIAL()\
  409. if (mb->partial != 0 && Feptr > mb->start_used_ptr) \
  410. { \
  411. mb->hitend = TRUE; \
  412. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
  413. }
  414. /* These macros are used to implement backtracking. They simulate a recursive
  415. call to the match() function by means of a local vector of frames which
  416. remember the backtracking points. */
  417. #define RMATCH(ra,rb)\
  418. {\
  419. start_ecode = ra;\
  420. Freturn_id = rb;\
  421. goto MATCH_RECURSE;\
  422. L_##rb:;\
  423. }
  424. #define RRETURN(ra)\
  425. {\
  426. rrc = ra;\
  427. goto RETURN_SWITCH;\
  428. }
  429. /*************************************************
  430. * Match from current position *
  431. *************************************************/
  432. /* This function is called to run one match attempt at a single starting point
  433. in the subject.
  434. Performance note: It might be tempting to extract commonly used fields from the
  435. mb structure (e.g. end_subject) into individual variables to improve
  436. performance. Tests using gcc on a SPARC disproved this; in the first case, it
  437. made performance worse.
  438. Arguments:
  439. start_eptr starting character in subject
  440. start_ecode starting position in compiled code
  441. ovector pointer to the final output vector
  442. oveccount number of pairs in ovector
  443. top_bracket number of capturing parentheses in the pattern
  444. frame_size size of each backtracking frame
  445. mb pointer to "static" variables block
  446. Returns: MATCH_MATCH if matched ) these values are >= 0
  447. MATCH_NOMATCH if failed to match )
  448. negative MATCH_xxx value for PRUNE, SKIP, etc
  449. negative PCRE2_ERROR_xxx value if aborted by an error condition
  450. (e.g. stopped by repeated call or depth limit)
  451. */
  452. static int
  453. match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
  454. uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
  455. match_block *mb)
  456. {
  457. /* Frame-handling variables */
  458. heapframe *F; /* Current frame pointer */
  459. heapframe *N = NULL; /* Temporary frame pointers */
  460. heapframe *P = NULL;
  461. heapframe *assert_accept_frame; /* For passing back the frame with captures */
  462. PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
  463. /* Local variables that do not need to be preserved over calls to RRMATCH(). */
  464. PCRE2_SPTR bracode; /* Temp pointer to start of group */
  465. PCRE2_SIZE offset; /* Used for group offsets */
  466. PCRE2_SIZE length; /* Used for various length calculations */
  467. int rrc; /* Return from functions & backtracking "recursions" */
  468. #ifdef SUPPORT_UNICODE
  469. int proptype; /* Type of character property */
  470. #endif
  471. uint32_t i; /* Used for local loops */
  472. uint32_t fc; /* Character values */
  473. uint32_t number; /* Used for group and other numbers */
  474. uint32_t reptype = 0; /* Type of repetition (0 to avoid compiler warning) */
  475. uint32_t group_frame_type; /* Specifies type for new group frames */
  476. BOOL condition; /* Used in conditional groups */
  477. BOOL cur_is_word; /* Used in "word" tests */
  478. BOOL prev_is_word; /* Used in "word" tests */
  479. /* UTF flag */
  480. #ifdef SUPPORT_UNICODE
  481. BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
  482. #else
  483. BOOL utf = FALSE;
  484. #endif
  485. /* This is the length of the last part of a backtracking frame that must be
  486. copied when a new frame is created. */
  487. frame_copy_size = frame_size - offsetof(heapframe, eptr);
  488. /* Set up the first current frame at the start of the vector, and initialize
  489. fields that are not reset for new frames. */
  490. F = mb->match_frames;
  491. Frdepth = 0; /* "Recursion" depth */
  492. Fcapture_last = 0; /* Number of most recent capture */
  493. Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */
  494. Fstart_match = Feptr = start_eptr; /* Current data pointer and start match */
  495. Fmark = NULL; /* Most recent mark */
  496. Foffset_top = 0; /* End of captures within the frame */
  497. Flast_group_offset = PCRE2_UNSET; /* Saved frame of most recent group */
  498. group_frame_type = 0; /* Not a start of group frame */
  499. goto NEW_FRAME; /* Start processing with this frame */
  500. /* Come back here when we want to create a new frame for remembering a
  501. backtracking point. */
  502. MATCH_RECURSE:
  503. /* Set up a new backtracking frame. If the vector is full, get a new one
  504. on the heap, doubling the size, but constrained by the heap limit. */
  505. N = (heapframe *)((char *)F + frame_size);
  506. if (N >= mb->match_frames_top)
  507. {
  508. PCRE2_SIZE newsize = mb->frame_vector_size * 2;
  509. heapframe *new;
  510. if ((newsize / 1024) > mb->heap_limit)
  511. {
  512. PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
  513. if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
  514. newsize = maxsize;
  515. }
  516. new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
  517. if (new == NULL) return PCRE2_ERROR_NOMEMORY;
  518. memcpy(new, mb->match_frames, mb->frame_vector_size);
  519. F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
  520. N = (heapframe *)((char *)F + frame_size);
  521. if (mb->match_frames != mb->stack_frames)
  522. mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
  523. mb->match_frames = new;
  524. mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
  525. mb->frame_vector_size = newsize;
  526. }
  527. #ifdef DEBUG_SHOW_RMATCH
  528. fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
  529. if (group_frame_type != 0)
  530. {
  531. fprintf(stderr, " type=%x ", group_frame_type);
  532. switch (GF_IDMASK(group_frame_type))
  533. {
  534. case GF_CAPTURE:
  535. fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
  536. break;
  537. case GF_NOCAPTURE:
  538. fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
  539. break;
  540. case GF_CONDASSERT:
  541. fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
  542. break;
  543. case GF_RECURSE:
  544. fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
  545. break;
  546. default:
  547. fprintf(stderr, "*** unknown ***");
  548. break;
  549. }
  550. }
  551. fprintf(stderr, "\n");
  552. #endif
  553. /* Copy those fields that must be copied into the new frame, increase the
  554. "recursion" depth (i.e. the new frame's index) and then make the new frame
  555. current. */
  556. memcpy((char *)N + offsetof(heapframe, eptr),
  557. (char *)F + offsetof(heapframe, eptr),
  558. frame_copy_size);
  559. N->rdepth = Frdepth + 1;
  560. F = N;
  561. /* Carry on processing with a new frame. */
  562. NEW_FRAME:
  563. Fgroup_frame_type = group_frame_type;
  564. Fecode = start_ecode; /* Starting code pointer */
  565. Fback_frame = frame_size; /* Default is go back one frame */
  566. /* If this is a special type of group frame, remember its offset for quick
  567. access at the end of the group. If this is a recursion, set a new current
  568. recursion value. */
  569. if (group_frame_type != 0)
  570. {
  571. Flast_group_offset = (char *)F - (char *)mb->match_frames;
  572. if (GF_IDMASK(group_frame_type) == GF_RECURSE)
  573. Fcurrent_recurse = GF_DATAMASK(group_frame_type);
  574. group_frame_type = 0;
  575. }
  576. /* ========================================================================= */
  577. /* This is the main processing loop. First check that we haven't recorded too
  578. many backtracks (search tree is too large), or that we haven't exceeded the
  579. recursive depth limit (used too many backtracking frames). If not, process the
  580. opcodes. */
  581. if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
  582. if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
  583. for (;;)
  584. {
  585. #ifdef DEBUG_SHOW_OPS
  586. fprintf(stderr, "++ op=%d\n", *Fecode);
  587. #endif
  588. Fop = (uint8_t)(*Fecode); /* Cast needed for 16-bit and 32-bit modes */
  589. switch(Fop)
  590. {
  591. /* ===================================================================== */
  592. /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
  593. any currently open capturing brackets. Unlike reaching the end of a group,
  594. where we know the starting frame is at the top of the chained frames, in
  595. this case we have to search back for the relevant frame in case other types
  596. of group that use chained frames have intervened. Multiple OP_CLOSEs always
  597. come innermost first, which matches the chain order. We can ignore this in
  598. a recursion, because captures are not passed out of recursions. */
  599. case OP_CLOSE:
  600. if (Fcurrent_recurse == RECURSE_UNSET)
  601. {
  602. number = GET2(Fecode, 1);
  603. offset = Flast_group_offset;
  604. for(;;)
  605. {
  606. if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
  607. N = (heapframe *)((char *)mb->match_frames + offset);
  608. P = (heapframe *)((char *)N - frame_size);
  609. if (N->group_frame_type == (GF_CAPTURE | number)) break;
  610. offset = P->last_group_offset;
  611. }
  612. offset = (number << 1) - 2;
  613. Fcapture_last = number;
  614. Fovector[offset] = P->eptr - mb->start_subject;
  615. Fovector[offset+1] = Feptr - mb->start_subject;
  616. if (offset >= Foffset_top) Foffset_top = offset + 2;
  617. }
  618. Fecode += PRIV(OP_lengths)[*Fecode];
  619. break;
  620. /* ===================================================================== */
  621. /* Real or forced end of the pattern, assertion, or recursion. In an
  622. assertion ACCEPT, update the last used pointer and remember the current
  623. frame so that the captures can be fished out of it. */
  624. case OP_ASSERT_ACCEPT:
  625. if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
  626. assert_accept_frame = F;
  627. RRETURN(MATCH_ACCEPT);
  628. /* If recursing, we have to find the most recent recursion. */
  629. case OP_ACCEPT:
  630. case OP_END:
  631. /* Handle end of a recursion. */
  632. if (Fcurrent_recurse != RECURSE_UNSET)
  633. {
  634. offset = Flast_group_offset;
  635. for(;;)
  636. {
  637. if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
  638. N = (heapframe *)((char *)mb->match_frames + offset);
  639. P = (heapframe *)((char *)N - frame_size);
  640. if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
  641. offset = P->last_group_offset;
  642. }
  643. /* N is now the frame of the recursion; the previous frame is at the
  644. OP_RECURSE position. Go back there, copying the current subject position
  645. and mark, and move on past the OP_RECURSE. */
  646. P->eptr = Feptr;
  647. P->mark = Fmark;
  648. F = P;
  649. Fecode += 1 + LINK_SIZE;
  650. continue;
  651. }
  652. /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
  653. is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
  654. start of the subject. In both cases, backtracking will then try other
  655. alternatives, if any. */
  656. if (Feptr == Fstart_match &&
  657. ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
  658. ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
  659. Fstart_match == mb->start_subject + mb->start_offset)))
  660. RRETURN(MATCH_NOMATCH);
  661. /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
  662. the end of the subject. After (*ACCEPT) we fail the entire match (at this
  663. position) but backtrack on reaching the end of the pattern. */
  664. if (Feptr < mb->end_subject &&
  665. ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
  666. {
  667. if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
  668. return MATCH_NOMATCH;
  669. }
  670. /* We have a successful match of the whole pattern. Record the result and
  671. then do a direct return from the function. If there is space in the offset
  672. vector, set any pairs that follow the highest-numbered captured string but
  673. are less than the number of capturing groups in the pattern to PCRE2_UNSET.
  674. It is documented that this happens. "Gaps" are set to PCRE2_UNSET
  675. dynamically. It is only those at the end that need setting here. */
  676. mb->end_match_ptr = Feptr; /* Record where we ended */
  677. mb->end_offset_top = Foffset_top; /* and how many extracts were taken */
  678. mb->mark = Fmark; /* and the last success mark */
  679. if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
  680. ovector[0] = Fstart_match - mb->start_subject;
  681. ovector[1] = Feptr - mb->start_subject;
  682. /* Set i to the smaller of the sizes of the external and frame ovectors. */
  683. i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
  684. memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
  685. while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
  686. return MATCH_MATCH; /* Note: NOT RRETURN */
  687. /*===================================================================== */
  688. /* Match any single character type except newline; have to take care with
  689. CRLF newlines and partial matching. */
  690. case OP_ANY:
  691. if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
  692. if (mb->partial != 0 &&
  693. Feptr == mb->end_subject - 1 &&
  694. NLBLOCK->nltype == NLTYPE_FIXED &&
  695. NLBLOCK->nllen == 2 &&
  696. UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
  697. {
  698. mb->hitend = TRUE;
  699. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  700. }
  701. /* Fall through */
  702. /* Match any single character whatsoever. */
  703. case OP_ALLANY:
  704. if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */
  705. { /* not be updated before SCHECK_PARTIAL. */
  706. SCHECK_PARTIAL();
  707. RRETURN(MATCH_NOMATCH);
  708. }
  709. Feptr++;
  710. #ifdef SUPPORT_UNICODE
  711. if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  712. #endif
  713. Fecode++;
  714. break;
  715. /* ===================================================================== */
  716. /* Match a single code unit, even in UTF mode. This opcode really does
  717. match any code unit, even newline. (It really should be called ANYCODEUNIT,
  718. of course - the byte name is from pre-16 bit days.) */
  719. case OP_ANYBYTE:
  720. if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */
  721. { /* not be updated before SCHECK_PARTIAL. */
  722. SCHECK_PARTIAL();
  723. RRETURN(MATCH_NOMATCH);
  724. }
  725. Feptr++;
  726. Fecode++;
  727. break;
  728. /* ===================================================================== */
  729. /* Match a single character, casefully */
  730. case OP_CHAR:
  731. #ifdef SUPPORT_UNICODE
  732. if (utf)
  733. {
  734. Flength = 1;
  735. Fecode++;
  736. GETCHARLEN(fc, Fecode, Flength);
  737. if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
  738. {
  739. CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
  740. RRETURN(MATCH_NOMATCH);
  741. }
  742. for (; Flength > 0; Flength--)
  743. {
  744. if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
  745. }
  746. }
  747. else
  748. #endif
  749. /* Not UTF mode */
  750. {
  751. if (mb->end_subject - Feptr < 1)
  752. {
  753. SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
  754. RRETURN(MATCH_NOMATCH);
  755. }
  756. if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
  757. Fecode += 2;
  758. }
  759. break;
  760. /* ===================================================================== */
  761. /* Match a single character, caselessly. If we are at the end of the
  762. subject, give up immediately. We get here only when the pattern character
  763. has at most one other case. Characters with more than two cases are coded
  764. as OP_PROP with the pseudo-property PT_CLIST. */
  765. case OP_CHARI:
  766. if (Feptr >= mb->end_subject)
  767. {
  768. SCHECK_PARTIAL();
  769. RRETURN(MATCH_NOMATCH);
  770. }
  771. #ifdef SUPPORT_UNICODE
  772. if (utf)
  773. {
  774. Flength = 1;
  775. Fecode++;
  776. GETCHARLEN(fc, Fecode, Flength);
  777. /* If the pattern character's value is < 128, we know that its other case
  778. (if any) is also < 128 (and therefore only one code unit long in all
  779. code-unit widths), so we can use the fast lookup table. We checked above
  780. that there is at least one character left in the subject. */
  781. if (fc < 128)
  782. {
  783. uint32_t cc = UCHAR21(Feptr);
  784. if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
  785. Fecode++;
  786. Feptr++;
  787. }
  788. /* Otherwise we must pick up the subject character and use Unicode
  789. property support to test its other case. Note that we cannot use the
  790. value of "Flength" to check for sufficient bytes left, because the other
  791. case of the character may have more or fewer code units. */
  792. else
  793. {
  794. uint32_t dc;
  795. GETCHARINC(dc, Feptr);
  796. Fecode += Flength;
  797. if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
  798. }
  799. }
  800. else
  801. #endif /* SUPPORT_UNICODE */
  802. /* Not UTF mode; use the table for characters < 256. */
  803. {
  804. if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
  805. != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
  806. Feptr++;
  807. Fecode += 2;
  808. }
  809. break;
  810. /* ===================================================================== */
  811. /* Match not a single character. */
  812. case OP_NOT:
  813. case OP_NOTI:
  814. if (Feptr >= mb->end_subject)
  815. {
  816. SCHECK_PARTIAL();
  817. RRETURN(MATCH_NOMATCH);
  818. }
  819. #ifdef SUPPORT_UNICODE
  820. if (utf)
  821. {
  822. uint32_t ch;
  823. Fecode++;
  824. GETCHARINC(ch, Fecode);
  825. GETCHARINC(fc, Feptr);
  826. if (ch == fc)
  827. {
  828. RRETURN(MATCH_NOMATCH); /* Caseful match */
  829. }
  830. else if (Fop == OP_NOTI) /* If caseless */
  831. {
  832. if (ch > 127)
  833. ch = UCD_OTHERCASE(ch);
  834. else
  835. ch = TABLE_GET(ch, mb->fcc, ch);
  836. if (ch == fc) RRETURN(MATCH_NOMATCH);
  837. }
  838. }
  839. else
  840. #endif /* SUPPORT_UNICODE */
  841. {
  842. uint32_t ch = Fecode[1];
  843. fc = *Feptr++;
  844. if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
  845. RRETURN(MATCH_NOMATCH);
  846. Fecode += 2;
  847. }
  848. break;
  849. /* ===================================================================== */
  850. /* Match a single character repeatedly. */
  851. #define Loclength F->temp_size
  852. #define Lstart_eptr F->temp_sptr[0]
  853. #define Lcharptr F->temp_sptr[1]
  854. #define Lmin F->temp_32[0]
  855. #define Lmax F->temp_32[1]
  856. #define Lc F->temp_32[2]
  857. #define Loc F->temp_32[3]
  858. case OP_EXACT:
  859. case OP_EXACTI:
  860. Lmin = Lmax = GET2(Fecode, 1);
  861. Fecode += 1 + IMM2_SIZE;
  862. goto REPEATCHAR;
  863. case OP_POSUPTO:
  864. case OP_POSUPTOI:
  865. reptype = REPTYPE_POS;
  866. Lmin = 0;
  867. Lmax = GET2(Fecode, 1);
  868. Fecode += 1 + IMM2_SIZE;
  869. goto REPEATCHAR;
  870. case OP_UPTO:
  871. case OP_UPTOI:
  872. reptype = REPTYPE_MAX;
  873. Lmin = 0;
  874. Lmax = GET2(Fecode, 1);
  875. Fecode += 1 + IMM2_SIZE;
  876. goto REPEATCHAR;
  877. case OP_MINUPTO:
  878. case OP_MINUPTOI:
  879. reptype = REPTYPE_MIN;
  880. Lmin = 0;
  881. Lmax = GET2(Fecode, 1);
  882. Fecode += 1 + IMM2_SIZE;
  883. goto REPEATCHAR;
  884. case OP_POSSTAR:
  885. case OP_POSSTARI:
  886. reptype = REPTYPE_POS;
  887. Lmin = 0;
  888. Lmax = UINT32_MAX;
  889. Fecode++;
  890. goto REPEATCHAR;
  891. case OP_POSPLUS:
  892. case OP_POSPLUSI:
  893. reptype = REPTYPE_POS;
  894. Lmin = 1;
  895. Lmax = UINT32_MAX;
  896. Fecode++;
  897. goto REPEATCHAR;
  898. case OP_POSQUERY:
  899. case OP_POSQUERYI:
  900. reptype = REPTYPE_POS;
  901. Lmin = 0;
  902. Lmax = 1;
  903. Fecode++;
  904. goto REPEATCHAR;
  905. case OP_STAR:
  906. case OP_STARI:
  907. case OP_MINSTAR:
  908. case OP_MINSTARI:
  909. case OP_PLUS:
  910. case OP_PLUSI:
  911. case OP_MINPLUS:
  912. case OP_MINPLUSI:
  913. case OP_QUERY:
  914. case OP_QUERYI:
  915. case OP_MINQUERY:
  916. case OP_MINQUERYI:
  917. fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
  918. Lmin = rep_min[fc];
  919. Lmax = rep_max[fc];
  920. reptype = rep_typ[fc];
  921. /* Common code for all repeated single-character matches. We first check
  922. for the minimum number of characters. If the minimum equals the maximum, we
  923. are done. Otherwise, if minimizing, check the rest of the pattern for a
  924. match; if there isn't one, advance up to the maximum, one character at a
  925. time.
  926. If maximizing, advance up to the maximum number of matching characters,
  927. until Feptr is past the end of the maximum run. If possessive, we are
  928. then done (no backing up). Otherwise, match at this position; anything
  929. other than no match is immediately returned. For nomatch, back up one
  930. character, unless we are matching \R and the last thing matched was
  931. \r\n, in which case, back up two code units until we reach the first
  932. optional character position.
  933. The various UTF/non-UTF and caseful/caseless cases are handled separately,
  934. for speed. */
  935. REPEATCHAR:
  936. #ifdef SUPPORT_UNICODE
  937. if (utf)
  938. {
  939. Flength = 1;
  940. Lcharptr = Fecode;
  941. GETCHARLEN(fc, Fecode, Flength);
  942. Fecode += Flength;
  943. /* Handle multi-code-unit character matching, caseful and caseless. */
  944. if (Flength > 1)
  945. {
  946. uint32_t othercase;
  947. if (Fop >= OP_STARI && /* Caseless */
  948. (othercase = UCD_OTHERCASE(fc)) != fc)
  949. Loclength = PRIV(ord2utf)(othercase, Foccu);
  950. else Loclength = 0;
  951. for (i = 1; i <= Lmin; i++)
  952. {
  953. if (Feptr <= mb->end_subject - Flength &&
  954. memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
  955. else if (Loclength > 0 &&
  956. Feptr <= mb->end_subject - Loclength &&
  957. memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
  958. Feptr += Loclength;
  959. else
  960. {
  961. CHECK_PARTIAL();
  962. RRETURN(MATCH_NOMATCH);
  963. }
  964. }
  965. if (Lmin == Lmax) continue;
  966. if (reptype == REPTYPE_MIN)
  967. {
  968. for (;;)
  969. {
  970. RMATCH(Fecode, RM202);
  971. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  972. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  973. if (Feptr <= mb->end_subject - Flength &&
  974. memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
  975. else if (Loclength > 0 &&
  976. Feptr <= mb->end_subject - Loclength &&
  977. memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
  978. Feptr += Loclength;
  979. else
  980. {
  981. CHECK_PARTIAL();
  982. RRETURN(MATCH_NOMATCH);
  983. }
  984. }
  985. /* Control never gets here */
  986. }
  987. else /* Maximize */
  988. {
  989. Lstart_eptr = Feptr;
  990. for (i = Lmin; i < Lmax; i++)
  991. {
  992. if (Feptr <= mb->end_subject - Flength &&
  993. memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
  994. Feptr += Flength;
  995. else if (Loclength > 0 &&
  996. Feptr <= mb->end_subject - Loclength &&
  997. memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
  998. Feptr += Loclength;
  999. else
  1000. {
  1001. CHECK_PARTIAL();
  1002. break;
  1003. }
  1004. }
  1005. /* After \C in UTF mode, Lstart_eptr might be in the middle of a
  1006. Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
  1007. go too far. */
  1008. if (reptype != REPTYPE_POS) for(;;)
  1009. {
  1010. if (Feptr <= Lstart_eptr) break;
  1011. RMATCH(Fecode, RM203);
  1012. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1013. Feptr--;
  1014. BACKCHAR(Feptr);
  1015. }
  1016. }
  1017. break; /* End of repeated wide character handling */
  1018. }
  1019. /* Length of UTF character is 1. Put it into the preserved variable and
  1020. fall through to the non-UTF code. */
  1021. Lc = fc;
  1022. }
  1023. else
  1024. #endif /* SUPPORT_UNICODE */
  1025. /* When not in UTF mode, load a single-code-unit character. Then proceed as
  1026. above. */
  1027. Lc = *Fecode++;
  1028. /* Caseless comparison */
  1029. if (Fop >= OP_STARI)
  1030. {
  1031. #if PCRE2_CODE_UNIT_WIDTH == 8
  1032. /* Lc must be < 128 in UTF-8 mode. */
  1033. Loc = mb->fcc[Lc];
  1034. #else /* 16-bit & 32-bit */
  1035. #ifdef SUPPORT_UNICODE
  1036. if (utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
  1037. else
  1038. #endif /* SUPPORT_UNICODE */
  1039. Loc = TABLE_GET(Lc, mb->fcc, Lc);
  1040. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  1041. for (i = 1; i <= Lmin; i++)
  1042. {
  1043. uint32_t cc; /* Faster than PCRE2_UCHAR */
  1044. if (Feptr >= mb->end_subject)
  1045. {
  1046. SCHECK_PARTIAL();
  1047. RRETURN(MATCH_NOMATCH);
  1048. }
  1049. cc = UCHAR21TEST(Feptr);
  1050. if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
  1051. Feptr++;
  1052. }
  1053. if (Lmin == Lmax) continue;
  1054. if (reptype == REPTYPE_MIN)
  1055. {
  1056. for (;;)
  1057. {
  1058. uint32_t cc; /* Faster than PCRE2_UCHAR */
  1059. RMATCH(Fecode, RM25);
  1060. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1061. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1062. if (Feptr >= mb->end_subject)
  1063. {
  1064. SCHECK_PARTIAL();
  1065. RRETURN(MATCH_NOMATCH);
  1066. }
  1067. cc = UCHAR21TEST(Feptr);
  1068. if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
  1069. Feptr++;
  1070. }
  1071. /* Control never gets here */
  1072. }
  1073. else /* Maximize */
  1074. {
  1075. Lstart_eptr = Feptr;
  1076. for (i = Lmin; i < Lmax; i++)
  1077. {
  1078. uint32_t cc; /* Faster than PCRE2_UCHAR */
  1079. if (Feptr >= mb->end_subject)
  1080. {
  1081. SCHECK_PARTIAL();
  1082. break;
  1083. }
  1084. cc = UCHAR21TEST(Feptr);
  1085. if (Lc != cc && Loc != cc) break;
  1086. Feptr++;
  1087. }
  1088. if (reptype != REPTYPE_POS) for (;;)
  1089. {
  1090. if (Feptr == Lstart_eptr) break;
  1091. RMATCH(Fecode, RM26);
  1092. Feptr--;
  1093. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1094. }
  1095. }
  1096. }
  1097. /* Caseful comparisons (includes all multi-byte characters) */
  1098. else
  1099. {
  1100. for (i = 1; i <= Lmin; i++)
  1101. {
  1102. if (Feptr >= mb->end_subject)
  1103. {
  1104. SCHECK_PARTIAL();
  1105. RRETURN(MATCH_NOMATCH);
  1106. }
  1107. if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
  1108. }
  1109. if (Lmin == Lmax) continue;
  1110. if (reptype == REPTYPE_MIN)
  1111. {
  1112. for (;;)
  1113. {
  1114. RMATCH(Fecode, RM27);
  1115. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1116. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1117. if (Feptr >= mb->end_subject)
  1118. {
  1119. SCHECK_PARTIAL();
  1120. RRETURN(MATCH_NOMATCH);
  1121. }
  1122. if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
  1123. }
  1124. /* Control never gets here */
  1125. }
  1126. else /* Maximize */
  1127. {
  1128. Lstart_eptr = Feptr;
  1129. for (i = Lmin; i < Lmax; i++)
  1130. {
  1131. if (Feptr >= mb->end_subject)
  1132. {
  1133. SCHECK_PARTIAL();
  1134. break;
  1135. }
  1136. if (Lc != UCHAR21TEST(Feptr)) break;
  1137. Feptr++;
  1138. }
  1139. if (reptype != REPTYPE_POS) for (;;)
  1140. {
  1141. if (Feptr <= Lstart_eptr) break;
  1142. RMATCH(Fecode, RM28);
  1143. Feptr--;
  1144. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1145. }
  1146. }
  1147. }
  1148. break;
  1149. #undef Loclength
  1150. #undef Lstart_eptr
  1151. #undef Lcharptr
  1152. #undef Lmin
  1153. #undef Lmax
  1154. #undef Lc
  1155. #undef Loc
  1156. /* ===================================================================== */
  1157. /* Match a negated single one-byte character repeatedly. This is almost a
  1158. repeat of the code for a repeated single character, but I haven't found a
  1159. nice way of commoning these up that doesn't require a test of the
  1160. positive/negative option for each character match. Maybe that wouldn't add
  1161. very much to the time taken, but character matching *is* what this is all
  1162. about... */
  1163. #define Lstart_eptr F->temp_sptr[0]
  1164. #define Lmin F->temp_32[0]
  1165. #define Lmax F->temp_32[1]
  1166. #define Lc F->temp_32[2]
  1167. #define Loc F->temp_32[3]
  1168. case OP_NOTEXACT:
  1169. case OP_NOTEXACTI:
  1170. Lmin = Lmax = GET2(Fecode, 1);
  1171. Fecode += 1 + IMM2_SIZE;
  1172. goto REPEATNOTCHAR;
  1173. case OP_NOTUPTO:
  1174. case OP_NOTUPTOI:
  1175. Lmin = 0;
  1176. Lmax = GET2(Fecode, 1);
  1177. reptype = REPTYPE_MAX;
  1178. Fecode += 1 + IMM2_SIZE;
  1179. goto REPEATNOTCHAR;
  1180. case OP_NOTMINUPTO:
  1181. case OP_NOTMINUPTOI:
  1182. Lmin = 0;
  1183. Lmax = GET2(Fecode, 1);
  1184. reptype = REPTYPE_MIN;
  1185. Fecode += 1 + IMM2_SIZE;
  1186. goto REPEATNOTCHAR;
  1187. case OP_NOTPOSSTAR:
  1188. case OP_NOTPOSSTARI:
  1189. reptype = REPTYPE_POS;
  1190. Lmin = 0;
  1191. Lmax = UINT32_MAX;
  1192. Fecode++;
  1193. goto REPEATNOTCHAR;
  1194. case OP_NOTPOSPLUS:
  1195. case OP_NOTPOSPLUSI:
  1196. reptype = REPTYPE_POS;
  1197. Lmin = 1;
  1198. Lmax = UINT32_MAX;
  1199. Fecode++;
  1200. goto REPEATNOTCHAR;
  1201. case OP_NOTPOSQUERY:
  1202. case OP_NOTPOSQUERYI:
  1203. reptype = REPTYPE_POS;
  1204. Lmin = 0;
  1205. Lmax = 1;
  1206. Fecode++;
  1207. goto REPEATNOTCHAR;
  1208. case OP_NOTPOSUPTO:
  1209. case OP_NOTPOSUPTOI:
  1210. reptype = REPTYPE_POS;
  1211. Lmin = 0;
  1212. Lmax = GET2(Fecode, 1);
  1213. Fecode += 1 + IMM2_SIZE;
  1214. goto REPEATNOTCHAR;
  1215. case OP_NOTSTAR:
  1216. case OP_NOTSTARI:
  1217. case OP_NOTMINSTAR:
  1218. case OP_NOTMINSTARI:
  1219. case OP_NOTPLUS:
  1220. case OP_NOTPLUSI:
  1221. case OP_NOTMINPLUS:
  1222. case OP_NOTMINPLUSI:
  1223. case OP_NOTQUERY:
  1224. case OP_NOTQUERYI:
  1225. case OP_NOTMINQUERY:
  1226. case OP_NOTMINQUERYI:
  1227. fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
  1228. Lmin = rep_min[fc];
  1229. Lmax = rep_max[fc];
  1230. reptype = rep_typ[fc];
  1231. /* Common code for all repeated single-character non-matches. */
  1232. REPEATNOTCHAR:
  1233. GETCHARINCTEST(Lc, Fecode);
  1234. /* The code is duplicated for the caseless and caseful cases, for speed,
  1235. since matching characters is likely to be quite common. First, ensure the
  1236. minimum number of matches are present. If Lmin = Lmax, we are done.
  1237. Otherwise, if minimizing, keep trying the rest of the expression and
  1238. advancing one matching character if failing, up to the maximum.
  1239. Alternatively, if maximizing, find the maximum number of characters and
  1240. work backwards. */
  1241. if (Fop >= OP_NOTSTARI) /* Caseless */
  1242. {
  1243. #ifdef SUPPORT_UNICODE
  1244. if (utf && Lc > 127)
  1245. Loc = UCD_OTHERCASE(Lc);
  1246. else
  1247. #endif /* SUPPORT_UNICODE */
  1248. Loc = TABLE_GET(Lc, mb->fcc, Lc); /* Other case from table */
  1249. #ifdef SUPPORT_UNICODE
  1250. if (utf)
  1251. {
  1252. uint32_t d;
  1253. for (i = 1; i <= Lmin; i++)
  1254. {
  1255. if (Feptr >= mb->end_subject)
  1256. {
  1257. SCHECK_PARTIAL();
  1258. RRETURN(MATCH_NOMATCH);
  1259. }
  1260. GETCHARINC(d, Feptr);
  1261. if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
  1262. }
  1263. }
  1264. else
  1265. #endif /* SUPPORT_UNICODE */
  1266. /* Not UTF mode */
  1267. {
  1268. for (i = 1; i <= Lmin; i++)
  1269. {
  1270. if (Feptr >= mb->end_subject)
  1271. {
  1272. SCHECK_PARTIAL();
  1273. RRETURN(MATCH_NOMATCH);
  1274. }
  1275. if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
  1276. Feptr++;
  1277. }
  1278. }
  1279. if (Lmin == Lmax) continue; /* Finished for exact count */
  1280. if (reptype == REPTYPE_MIN)
  1281. {
  1282. #ifdef SUPPORT_UNICODE
  1283. if (utf)
  1284. {
  1285. uint32_t d;
  1286. for (;;)
  1287. {
  1288. RMATCH(Fecode, RM204);
  1289. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1290. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1291. if (Feptr >= mb->end_subject)
  1292. {
  1293. SCHECK_PARTIAL();
  1294. RRETURN(MATCH_NOMATCH);
  1295. }
  1296. GETCHARINC(d, Feptr);
  1297. if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
  1298. }
  1299. }
  1300. else
  1301. #endif /*SUPPORT_UNICODE */
  1302. /* Not UTF mode */
  1303. {
  1304. for (;;)
  1305. {
  1306. RMATCH(Fecode, RM29);
  1307. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1308. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1309. if (Feptr >= mb->end_subject)
  1310. {
  1311. SCHECK_PARTIAL();
  1312. RRETURN(MATCH_NOMATCH);
  1313. }
  1314. if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
  1315. Feptr++;
  1316. }
  1317. }
  1318. /* Control never gets here */
  1319. }
  1320. /* Maximize case */
  1321. else
  1322. {
  1323. Lstart_eptr = Feptr;
  1324. #ifdef SUPPORT_UNICODE
  1325. if (utf)
  1326. {
  1327. uint32_t d;
  1328. for (i = Lmin; i < Lmax; i++)
  1329. {
  1330. int len = 1;
  1331. if (Feptr >= mb->end_subject)
  1332. {
  1333. SCHECK_PARTIAL();
  1334. break;
  1335. }
  1336. GETCHARLEN(d, Feptr, len);
  1337. if (Lc == d || Loc == d) break;
  1338. Feptr += len;
  1339. }
  1340. /* After \C in UTF mode, Lstart_eptr might be in the middle of a
  1341. Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
  1342. go too far. */
  1343. if (reptype != REPTYPE_POS) for(;;)
  1344. {
  1345. if (Feptr <= Lstart_eptr) break;
  1346. RMATCH(Fecode, RM205);
  1347. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1348. Feptr--;
  1349. BACKCHAR(Feptr);
  1350. }
  1351. }
  1352. else
  1353. #endif /* SUPPORT_UNICODE */
  1354. /* Not UTF mode */
  1355. {
  1356. for (i = Lmin; i < Lmax; i++)
  1357. {
  1358. if (Feptr >= mb->end_subject)
  1359. {
  1360. SCHECK_PARTIAL();
  1361. break;
  1362. }
  1363. if (Lc == *Feptr || Loc == *Feptr) break;
  1364. Feptr++;
  1365. }
  1366. if (reptype != REPTYPE_POS) for (;;)
  1367. {
  1368. if (Feptr == Lstart_eptr) break;
  1369. RMATCH(Fecode, RM30);
  1370. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1371. Feptr--;
  1372. }
  1373. }
  1374. }
  1375. }
  1376. /* Caseful comparisons */
  1377. else
  1378. {
  1379. #ifdef SUPPORT_UNICODE
  1380. if (utf)
  1381. {
  1382. uint32_t d;
  1383. for (i = 1; i <= Lmin; i++)
  1384. {
  1385. if (Feptr >= mb->end_subject)
  1386. {
  1387. SCHECK_PARTIAL();
  1388. RRETURN(MATCH_NOMATCH);
  1389. }
  1390. GETCHARINC(d, Feptr);
  1391. if (Lc == d) RRETURN(MATCH_NOMATCH);
  1392. }
  1393. }
  1394. else
  1395. #endif
  1396. /* Not UTF mode */
  1397. {
  1398. for (i = 1; i <= Lmin; i++)
  1399. {
  1400. if (Feptr >= mb->end_subject)
  1401. {
  1402. SCHECK_PARTIAL();
  1403. RRETURN(MATCH_NOMATCH);
  1404. }
  1405. if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
  1406. }
  1407. }
  1408. if (Lmin == Lmax) continue;
  1409. if (reptype == REPTYPE_MIN)
  1410. {
  1411. #ifdef SUPPORT_UNICODE
  1412. if (utf)
  1413. {
  1414. uint32_t d;
  1415. for (;;)
  1416. {
  1417. RMATCH(Fecode, RM206);
  1418. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1419. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1420. if (Feptr >= mb->end_subject)
  1421. {
  1422. SCHECK_PARTIAL();
  1423. RRETURN(MATCH_NOMATCH);
  1424. }
  1425. GETCHARINC(d, Feptr);
  1426. if (Lc == d) RRETURN(MATCH_NOMATCH);
  1427. }
  1428. }
  1429. else
  1430. #endif
  1431. /* Not UTF mode */
  1432. {
  1433. for (;;)
  1434. {
  1435. RMATCH(Fecode, RM31);
  1436. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1437. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1438. if (Feptr >= mb->end_subject)
  1439. {
  1440. SCHECK_PARTIAL();
  1441. RRETURN(MATCH_NOMATCH);
  1442. }
  1443. if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
  1444. }
  1445. }
  1446. /* Control never gets here */
  1447. }
  1448. /* Maximize case */
  1449. else
  1450. {
  1451. Lstart_eptr = Feptr;
  1452. #ifdef SUPPORT_UNICODE
  1453. if (utf)
  1454. {
  1455. uint32_t d;
  1456. for (i = Lmin; i < Lmax; i++)
  1457. {
  1458. int len = 1;
  1459. if (Feptr >= mb->end_subject)
  1460. {
  1461. SCHECK_PARTIAL();
  1462. break;
  1463. }
  1464. GETCHARLEN(d, Feptr, len);
  1465. if (Lc == d) break;
  1466. Feptr += len;
  1467. }
  1468. /* After \C in UTF mode, Lstart_eptr might be in the middle of a
  1469. Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
  1470. go too far. */
  1471. if (reptype != REPTYPE_POS) for(;;)
  1472. {
  1473. if (Feptr <= Lstart_eptr) break;
  1474. RMATCH(Fecode, RM207);
  1475. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1476. Feptr--;
  1477. BACKCHAR(Feptr);
  1478. }
  1479. }
  1480. else
  1481. #endif
  1482. /* Not UTF mode */
  1483. {
  1484. for (i = Lmin; i < Lmax; i++)
  1485. {
  1486. if (Feptr >= mb->end_subject)
  1487. {
  1488. SCHECK_PARTIAL();
  1489. break;
  1490. }
  1491. if (Lc == *Feptr) break;
  1492. Feptr++;
  1493. }
  1494. if (reptype != REPTYPE_POS) for (;;)
  1495. {
  1496. if (Feptr == Lstart_eptr) break;
  1497. RMATCH(Fecode, RM32);
  1498. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1499. Feptr--;
  1500. }
  1501. }
  1502. }
  1503. }
  1504. break;
  1505. #undef Lstart_eptr
  1506. #undef Lmin
  1507. #undef Lmax
  1508. #undef Lc
  1509. #undef Loc
  1510. /* ===================================================================== */
  1511. /* Match a bit-mapped character class, possibly repeatedly. These op codes
  1512. are used when all the characters in the class have values in the range
  1513. 0-255, and either the matching is caseful, or the characters are in the
  1514. range 0-127 when UTF processing is enabled. The only difference between
  1515. OP_CLASS and OP_NCLASS occurs when a data character outside the range is
  1516. encountered. */
  1517. #define Lmin F->temp_32[0]
  1518. #define Lmax F->temp_32[1]
  1519. #define Lstart_eptr F->temp_sptr[0]
  1520. #define Lbyte_map_address F->temp_sptr[1]
  1521. #define Lbyte_map ((unsigned char *)Lbyte_map_address)
  1522. case OP_NCLASS:
  1523. case OP_CLASS:
  1524. {
  1525. Lbyte_map_address = Fecode + 1; /* Save for matching */
  1526. Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
  1527. /* Look past the end of the item to see if there is repeat information
  1528. following. Then obey similar code to character type repeats. */
  1529. switch (*Fecode)
  1530. {
  1531. case OP_CRSTAR:
  1532. case OP_CRMINSTAR:
  1533. case OP_CRPLUS:
  1534. case OP_CRMINPLUS:
  1535. case OP_CRQUERY:
  1536. case OP_CRMINQUERY:
  1537. case OP_CRPOSSTAR:
  1538. case OP_CRPOSPLUS:
  1539. case OP_CRPOSQUERY:
  1540. fc = *Fecode++ - OP_CRSTAR;
  1541. Lmin = rep_min[fc];
  1542. Lmax = rep_max[fc];
  1543. reptype = rep_typ[fc];
  1544. break;
  1545. case OP_CRRANGE:
  1546. case OP_CRMINRANGE:
  1547. case OP_CRPOSRANGE:
  1548. Lmin = GET2(Fecode, 1);
  1549. Lmax = GET2(Fecode, 1 + IMM2_SIZE);
  1550. if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */
  1551. reptype = rep_typ[*Fecode - OP_CRSTAR];
  1552. Fecode += 1 + 2 * IMM2_SIZE;
  1553. break;
  1554. default: /* No repeat follows */
  1555. Lmin = Lmax = 1;
  1556. break;
  1557. }
  1558. /* First, ensure the minimum number of matches are present. */
  1559. #ifdef SUPPORT_UNICODE
  1560. if (utf)
  1561. {
  1562. for (i = 1; i <= Lmin; i++)
  1563. {
  1564. if (Feptr >= mb->end_subject)
  1565. {
  1566. SCHECK_PARTIAL();
  1567. RRETURN(MATCH_NOMATCH);
  1568. }
  1569. GETCHARINC(fc, Feptr);
  1570. if (fc > 255)
  1571. {
  1572. if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
  1573. }
  1574. else
  1575. if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
  1576. }
  1577. }
  1578. else
  1579. #endif
  1580. /* Not UTF mode */
  1581. {
  1582. for (i = 1; i <= Lmin; i++)
  1583. {
  1584. if (Feptr >= mb->end_subject)
  1585. {
  1586. SCHECK_PARTIAL();
  1587. RRETURN(MATCH_NOMATCH);
  1588. }
  1589. fc = *Feptr++;
  1590. #if PCRE2_CODE_UNIT_WIDTH != 8
  1591. if (fc > 255)
  1592. {
  1593. if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
  1594. }
  1595. else
  1596. #endif
  1597. if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
  1598. }
  1599. }
  1600. /* If Lmax == Lmin we are done. Continue with main loop. */
  1601. if (Lmin == Lmax) continue;
  1602. /* If minimizing, keep testing the rest of the expression and advancing
  1603. the pointer while it matches the class. */
  1604. if (reptype == REPTYPE_MIN)
  1605. {
  1606. #ifdef SUPPORT_UNICODE
  1607. if (utf)
  1608. {
  1609. for (;;)
  1610. {
  1611. RMATCH(Fecode, RM200);
  1612. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1613. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1614. if (Feptr >= mb->end_subject)
  1615. {
  1616. SCHECK_PARTIAL();
  1617. RRETURN(MATCH_NOMATCH);
  1618. }
  1619. GETCHARINC(fc, Feptr);
  1620. if (fc > 255)
  1621. {
  1622. if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
  1623. }
  1624. else
  1625. if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
  1626. }
  1627. }
  1628. else
  1629. #endif
  1630. /* Not UTF mode */
  1631. {
  1632. for (;;)
  1633. {
  1634. RMATCH(Fecode, RM23);
  1635. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1636. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1637. if (Feptr >= mb->end_subject)
  1638. {
  1639. SCHECK_PARTIAL();
  1640. RRETURN(MATCH_NOMATCH);
  1641. }
  1642. fc = *Feptr++;
  1643. #if PCRE2_CODE_UNIT_WIDTH != 8
  1644. if (fc > 255)
  1645. {
  1646. if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
  1647. }
  1648. else
  1649. #endif
  1650. if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
  1651. }
  1652. }
  1653. /* Control never gets here */
  1654. }
  1655. /* If maximizing, find the longest possible run, then work backwards. */
  1656. else
  1657. {
  1658. Lstart_eptr = Feptr;
  1659. #ifdef SUPPORT_UNICODE
  1660. if (utf)
  1661. {
  1662. for (i = Lmin; i < Lmax; i++)
  1663. {
  1664. int len = 1;
  1665. if (Feptr >= mb->end_subject)
  1666. {
  1667. SCHECK_PARTIAL();
  1668. break;
  1669. }
  1670. GETCHARLEN(fc, Feptr, len);
  1671. if (fc > 255)
  1672. {
  1673. if (Fop == OP_CLASS) break;
  1674. }
  1675. else
  1676. if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
  1677. Feptr += len;
  1678. }
  1679. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  1680. for (;;)
  1681. {
  1682. RMATCH(Fecode, RM201);
  1683. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1684. if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
  1685. BACKCHAR(Feptr);
  1686. }
  1687. }
  1688. else
  1689. #endif
  1690. /* Not UTF mode */
  1691. {
  1692. for (i = Lmin; i < Lmax; i++)
  1693. {
  1694. if (Feptr >= mb->end_subject)
  1695. {
  1696. SCHECK_PARTIAL();
  1697. break;
  1698. }
  1699. fc = *Feptr;
  1700. #if PCRE2_CODE_UNIT_WIDTH != 8
  1701. if (fc > 255)
  1702. {
  1703. if (Fop == OP_CLASS) break;
  1704. }
  1705. else
  1706. #endif
  1707. if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
  1708. Feptr++;
  1709. }
  1710. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  1711. while (Feptr >= Lstart_eptr)
  1712. {
  1713. RMATCH(Fecode, RM24);
  1714. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1715. Feptr--;
  1716. }
  1717. }
  1718. RRETURN(MATCH_NOMATCH);
  1719. }
  1720. }
  1721. /* Control never gets here */
  1722. #undef Lbyte_map_address
  1723. #undef Lbyte_map
  1724. #undef Lstart_eptr
  1725. #undef Lmin
  1726. #undef Lmax
  1727. /* ===================================================================== */
  1728. /* Match an extended character class. In the 8-bit library, this opcode is
  1729. encountered only when UTF-8 mode mode is supported. In the 16-bit and
  1730. 32-bit libraries, codepoints greater than 255 may be encountered even when
  1731. UTF is not supported. */
  1732. #define Lstart_eptr F->temp_sptr[0]
  1733. #define Lxclass_data F->temp_sptr[1]
  1734. #define Lmin F->temp_32[0]
  1735. #define Lmax F->temp_32[1]
  1736. #ifdef SUPPORT_WIDE_CHARS
  1737. case OP_XCLASS:
  1738. {
  1739. Lxclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */
  1740. Fecode += GET(Fecode, 1); /* Advance past the item */
  1741. switch (*Fecode)
  1742. {
  1743. case OP_CRSTAR:
  1744. case OP_CRMINSTAR:
  1745. case OP_CRPLUS:
  1746. case OP_CRMINPLUS:
  1747. case OP_CRQUERY:
  1748. case OP_CRMINQUERY:
  1749. case OP_CRPOSSTAR:
  1750. case OP_CRPOSPLUS:
  1751. case OP_CRPOSQUERY:
  1752. fc = *Fecode++ - OP_CRSTAR;
  1753. Lmin = rep_min[fc];
  1754. Lmax = rep_max[fc];
  1755. reptype = rep_typ[fc];
  1756. break;
  1757. case OP_CRRANGE:
  1758. case OP_CRMINRANGE:
  1759. case OP_CRPOSRANGE:
  1760. Lmin = GET2(Fecode, 1);
  1761. Lmax = GET2(Fecode, 1 + IMM2_SIZE);
  1762. if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */
  1763. reptype = rep_typ[*Fecode - OP_CRSTAR];
  1764. Fecode += 1 + 2 * IMM2_SIZE;
  1765. break;
  1766. default: /* No repeat follows */
  1767. Lmin = Lmax = 1;
  1768. break;
  1769. }
  1770. /* First, ensure the minimum number of matches are present. */
  1771. for (i = 1; i <= Lmin; i++)
  1772. {
  1773. if (Feptr >= mb->end_subject)
  1774. {
  1775. SCHECK_PARTIAL();
  1776. RRETURN(MATCH_NOMATCH);
  1777. }
  1778. GETCHARINCTEST(fc, Feptr);
  1779. if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
  1780. }
  1781. /* If Lmax == Lmin we can just continue with the main loop. */
  1782. if (Lmin == Lmax) continue;
  1783. /* If minimizing, keep testing the rest of the expression and advancing
  1784. the pointer while it matches the class. */
  1785. if (reptype == REPTYPE_MIN)
  1786. {
  1787. for (;;)
  1788. {
  1789. RMATCH(Fecode, RM100);
  1790. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1791. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  1792. if (Feptr >= mb->end_subject)
  1793. {
  1794. SCHECK_PARTIAL();
  1795. RRETURN(MATCH_NOMATCH);
  1796. }
  1797. GETCHARINCTEST(fc, Feptr);
  1798. if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
  1799. }
  1800. /* Control never gets here */
  1801. }
  1802. /* If maximizing, find the longest possible run, then work backwards. */
  1803. else
  1804. {
  1805. Lstart_eptr = Feptr;
  1806. for (i = Lmin; i < Lmax; i++)
  1807. {
  1808. int len = 1;
  1809. if (Feptr >= mb->end_subject)
  1810. {
  1811. SCHECK_PARTIAL();
  1812. break;
  1813. }
  1814. #ifdef SUPPORT_UNICODE
  1815. GETCHARLENTEST(fc, Feptr, len);
  1816. #else
  1817. fc = *Feptr;
  1818. #endif
  1819. if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
  1820. Feptr += len;
  1821. }
  1822. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  1823. for(;;)
  1824. {
  1825. RMATCH(Fecode, RM101);
  1826. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  1827. if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
  1828. #ifdef SUPPORT_UNICODE
  1829. if (utf) BACKCHAR(Feptr);
  1830. #endif
  1831. }
  1832. RRETURN(MATCH_NOMATCH);
  1833. }
  1834. /* Control never gets here */
  1835. }
  1836. #endif /* SUPPORT_WIDE_CHARS: end of XCLASS */
  1837. #undef Lstart_eptr
  1838. #undef Lxclass_data
  1839. #undef Lmin
  1840. #undef Lmax
  1841. /* ===================================================================== */
  1842. /* Match various character types when PCRE2_UCP is not set. These opcodes
  1843. are not generated when PCRE2_UCP is set - instead appropriate property
  1844. tests are compiled. */
  1845. case OP_NOT_DIGIT:
  1846. if (Feptr >= mb->end_subject)
  1847. {
  1848. SCHECK_PARTIAL();
  1849. RRETURN(MATCH_NOMATCH);
  1850. }
  1851. GETCHARINCTEST(fc, Feptr);
  1852. if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
  1853. RRETURN(MATCH_NOMATCH);
  1854. Fecode++;
  1855. break;
  1856. case OP_DIGIT:
  1857. if (Feptr >= mb->end_subject)
  1858. {
  1859. SCHECK_PARTIAL();
  1860. RRETURN(MATCH_NOMATCH);
  1861. }
  1862. GETCHARINCTEST(fc, Feptr);
  1863. if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
  1864. RRETURN(MATCH_NOMATCH);
  1865. Fecode++;
  1866. break;
  1867. case OP_NOT_WHITESPACE:
  1868. if (Feptr >= mb->end_subject)
  1869. {
  1870. SCHECK_PARTIAL();
  1871. RRETURN(MATCH_NOMATCH);
  1872. }
  1873. GETCHARINCTEST(fc, Feptr);
  1874. if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
  1875. RRETURN(MATCH_NOMATCH);
  1876. Fecode++;
  1877. break;
  1878. case OP_WHITESPACE:
  1879. if (Feptr >= mb->end_subject)
  1880. {
  1881. SCHECK_PARTIAL();
  1882. RRETURN(MATCH_NOMATCH);
  1883. }
  1884. GETCHARINCTEST(fc, Feptr);
  1885. if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
  1886. RRETURN(MATCH_NOMATCH);
  1887. Fecode++;
  1888. break;
  1889. case OP_NOT_WORDCHAR:
  1890. if (Feptr >= mb->end_subject)
  1891. {
  1892. SCHECK_PARTIAL();
  1893. RRETURN(MATCH_NOMATCH);
  1894. }
  1895. GETCHARINCTEST(fc, Feptr);
  1896. if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
  1897. RRETURN(MATCH_NOMATCH);
  1898. Fecode++;
  1899. break;
  1900. case OP_WORDCHAR:
  1901. if (Feptr >= mb->end_subject)
  1902. {
  1903. SCHECK_PARTIAL();
  1904. RRETURN(MATCH_NOMATCH);
  1905. }
  1906. GETCHARINCTEST(fc, Feptr);
  1907. if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
  1908. RRETURN(MATCH_NOMATCH);
  1909. Fecode++;
  1910. break;
  1911. case OP_ANYNL:
  1912. if (Feptr >= mb->end_subject)
  1913. {
  1914. SCHECK_PARTIAL();
  1915. RRETURN(MATCH_NOMATCH);
  1916. }
  1917. GETCHARINCTEST(fc, Feptr);
  1918. switch(fc)
  1919. {
  1920. default: RRETURN(MATCH_NOMATCH);
  1921. case CHAR_CR:
  1922. if (Feptr >= mb->end_subject)
  1923. {
  1924. SCHECK_PARTIAL();
  1925. }
  1926. else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
  1927. break;
  1928. case CHAR_LF:
  1929. break;
  1930. case CHAR_VT:
  1931. case CHAR_FF:
  1932. case CHAR_NEL:
  1933. #ifndef EBCDIC
  1934. case 0x2028:
  1935. case 0x2029:
  1936. #endif /* Not EBCDIC */
  1937. if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
  1938. break;
  1939. }
  1940. Fecode++;
  1941. break;
  1942. case OP_NOT_HSPACE:
  1943. if (Feptr >= mb->end_subject)
  1944. {
  1945. SCHECK_PARTIAL();
  1946. RRETURN(MATCH_NOMATCH);
  1947. }
  1948. GETCHARINCTEST(fc, Feptr);
  1949. switch(fc)
  1950. {
  1951. HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
  1952. default: break;
  1953. }
  1954. Fecode++;
  1955. break;
  1956. case OP_HSPACE:
  1957. if (Feptr >= mb->end_subject)
  1958. {
  1959. SCHECK_PARTIAL();
  1960. RRETURN(MATCH_NOMATCH);
  1961. }
  1962. GETCHARINCTEST(fc, Feptr);
  1963. switch(fc)
  1964. {
  1965. HSPACE_CASES: break; /* Byte and multibyte cases */
  1966. default: RRETURN(MATCH_NOMATCH);
  1967. }
  1968. Fecode++;
  1969. break;
  1970. case OP_NOT_VSPACE:
  1971. if (Feptr >= mb->end_subject)
  1972. {
  1973. SCHECK_PARTIAL();
  1974. RRETURN(MATCH_NOMATCH);
  1975. }
  1976. GETCHARINCTEST(fc, Feptr);
  1977. switch(fc)
  1978. {
  1979. VSPACE_CASES: RRETURN(MATCH_NOMATCH);
  1980. default: break;
  1981. }
  1982. Fecode++;
  1983. break;
  1984. case OP_VSPACE:
  1985. if (Feptr >= mb->end_subject)
  1986. {
  1987. SCHECK_PARTIAL();
  1988. RRETURN(MATCH_NOMATCH);
  1989. }
  1990. GETCHARINCTEST(fc, Feptr);
  1991. switch(fc)
  1992. {
  1993. VSPACE_CASES: break;
  1994. default: RRETURN(MATCH_NOMATCH);
  1995. }
  1996. Fecode++;
  1997. break;
  1998. #ifdef SUPPORT_UNICODE
  1999. /* ===================================================================== */
  2000. /* Check the next character by Unicode property. We will get here only
  2001. if the support is in the binary; otherwise a compile-time error occurs. */
  2002. case OP_PROP:
  2003. case OP_NOTPROP:
  2004. if (Feptr >= mb->end_subject)
  2005. {
  2006. SCHECK_PARTIAL();
  2007. RRETURN(MATCH_NOMATCH);
  2008. }
  2009. GETCHARINCTEST(fc, Feptr);
  2010. {
  2011. const uint32_t *cp;
  2012. const ucd_record *prop = GET_UCD(fc);
  2013. switch(Fecode[1])
  2014. {
  2015. case PT_ANY:
  2016. if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2017. break;
  2018. case PT_LAMP:
  2019. if ((prop->chartype == ucp_Lu ||
  2020. prop->chartype == ucp_Ll ||
  2021. prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
  2022. RRETURN(MATCH_NOMATCH);
  2023. break;
  2024. case PT_GC:
  2025. if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
  2026. RRETURN(MATCH_NOMATCH);
  2027. break;
  2028. case PT_PC:
  2029. if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
  2030. RRETURN(MATCH_NOMATCH);
  2031. break;
  2032. case PT_SC:
  2033. if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
  2034. RRETURN(MATCH_NOMATCH);
  2035. break;
  2036. /* These are specials */
  2037. case PT_ALNUM:
  2038. if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
  2039. PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
  2040. RRETURN(MATCH_NOMATCH);
  2041. break;
  2042. /* Perl space used to exclude VT, but from Perl 5.18 it is included,
  2043. which means that Perl space and POSIX space are now identical. PCRE
  2044. was changed at release 8.34. */
  2045. case PT_SPACE: /* Perl space */
  2046. case PT_PXSPACE: /* POSIX space */
  2047. switch(fc)
  2048. {
  2049. HSPACE_CASES:
  2050. VSPACE_CASES:
  2051. if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2052. break;
  2053. default:
  2054. if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
  2055. (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
  2056. break;
  2057. }
  2058. break;
  2059. case PT_WORD:
  2060. if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
  2061. PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
  2062. fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
  2063. RRETURN(MATCH_NOMATCH);
  2064. break;
  2065. case PT_CLIST:
  2066. cp = PRIV(ucd_caseless_sets) + Fecode[2];
  2067. for (;;)
  2068. {
  2069. if (fc < *cp)
  2070. { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
  2071. if (fc == *cp++)
  2072. { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
  2073. }
  2074. break;
  2075. case PT_UCNC:
  2076. if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
  2077. fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
  2078. fc >= 0xe000) == (Fop == OP_NOTPROP))
  2079. RRETURN(MATCH_NOMATCH);
  2080. break;
  2081. /* This should never occur */
  2082. default:
  2083. return PCRE2_ERROR_INTERNAL;
  2084. }
  2085. Fecode += 3;
  2086. }
  2087. break;
  2088. /* ===================================================================== */
  2089. /* Match an extended Unicode sequence. We will get here only if the support
  2090. is in the binary; otherwise a compile-time error occurs. */
  2091. case OP_EXTUNI:
  2092. if (Feptr >= mb->end_subject)
  2093. {
  2094. SCHECK_PARTIAL();
  2095. RRETURN(MATCH_NOMATCH);
  2096. }
  2097. else
  2098. {
  2099. GETCHARINCTEST(fc, Feptr);
  2100. Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
  2101. NULL);
  2102. }
  2103. CHECK_PARTIAL();
  2104. Fecode++;
  2105. break;
  2106. #endif /* SUPPORT_UNICODE */
  2107. /* ===================================================================== */
  2108. /* Match a single character type repeatedly. Note that the property type
  2109. does not need to be in a stack frame as it not used within an RMATCH()
  2110. loop. */
  2111. #define Lstart_eptr F->temp_sptr[0]
  2112. #define Lmin F->temp_32[0]
  2113. #define Lmax F->temp_32[1]
  2114. #define Lctype F->temp_32[2]
  2115. #define Lpropvalue F->temp_32[3]
  2116. case OP_TYPEEXACT:
  2117. Lmin = Lmax = GET2(Fecode, 1);
  2118. Fecode += 1 + IMM2_SIZE;
  2119. goto REPEATTYPE;
  2120. case OP_TYPEUPTO:
  2121. case OP_TYPEMINUPTO:
  2122. Lmin = 0;
  2123. Lmax = GET2(Fecode, 1);
  2124. reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
  2125. Fecode += 1 + IMM2_SIZE;
  2126. goto REPEATTYPE;
  2127. case OP_TYPEPOSSTAR:
  2128. reptype = REPTYPE_POS;
  2129. Lmin = 0;
  2130. Lmax = UINT32_MAX;
  2131. Fecode++;
  2132. goto REPEATTYPE;
  2133. case OP_TYPEPOSPLUS:
  2134. reptype = REPTYPE_POS;
  2135. Lmin = 1;
  2136. Lmax = UINT32_MAX;
  2137. Fecode++;
  2138. goto REPEATTYPE;
  2139. case OP_TYPEPOSQUERY:
  2140. reptype = REPTYPE_POS;
  2141. Lmin = 0;
  2142. Lmax = 1;
  2143. Fecode++;
  2144. goto REPEATTYPE;
  2145. case OP_TYPEPOSUPTO:
  2146. reptype = REPTYPE_POS;
  2147. Lmin = 0;
  2148. Lmax = GET2(Fecode, 1);
  2149. Fecode += 1 + IMM2_SIZE;
  2150. goto REPEATTYPE;
  2151. case OP_TYPESTAR:
  2152. case OP_TYPEMINSTAR:
  2153. case OP_TYPEPLUS:
  2154. case OP_TYPEMINPLUS:
  2155. case OP_TYPEQUERY:
  2156. case OP_TYPEMINQUERY:
  2157. fc = *Fecode++ - OP_TYPESTAR;
  2158. Lmin = rep_min[fc];
  2159. Lmax = rep_max[fc];
  2160. reptype = rep_typ[fc];
  2161. /* Common code for all repeated character type matches. */
  2162. REPEATTYPE:
  2163. Lctype = *Fecode++; /* Code for the character type */
  2164. #ifdef SUPPORT_UNICODE
  2165. if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
  2166. {
  2167. proptype = *Fecode++;
  2168. Lpropvalue = *Fecode++;
  2169. }
  2170. else proptype = -1;
  2171. #endif
  2172. /* First, ensure the minimum number of matches are present. Use inline
  2173. code for maximizing the speed, and do the type test once at the start
  2174. (i.e. keep it out of the loop). The code for UTF mode is separated out for
  2175. tidiness, except for Unicode property tests. */
  2176. if (Lmin > 0)
  2177. {
  2178. #ifdef SUPPORT_UNICODE
  2179. if (proptype >= 0) /* Property tests in all modes */
  2180. {
  2181. switch(proptype)
  2182. {
  2183. case PT_ANY:
  2184. if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2185. for (i = 1; i <= Lmin; i++)
  2186. {
  2187. if (Feptr >= mb->end_subject)
  2188. {
  2189. SCHECK_PARTIAL();
  2190. RRETURN(MATCH_NOMATCH);
  2191. }
  2192. GETCHARINCTEST(fc, Feptr);
  2193. }
  2194. break;
  2195. case PT_LAMP:
  2196. for (i = 1; i <= Lmin; i++)
  2197. {
  2198. int chartype;
  2199. if (Feptr >= mb->end_subject)
  2200. {
  2201. SCHECK_PARTIAL();
  2202. RRETURN(MATCH_NOMATCH);
  2203. }
  2204. GETCHARINCTEST(fc, Feptr);
  2205. chartype = UCD_CHARTYPE(fc);
  2206. if ((chartype == ucp_Lu ||
  2207. chartype == ucp_Ll ||
  2208. chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
  2209. RRETURN(MATCH_NOMATCH);
  2210. }
  2211. break;
  2212. case PT_GC:
  2213. for (i = 1; i <= Lmin; i++)
  2214. {
  2215. if (Feptr >= mb->end_subject)
  2216. {
  2217. SCHECK_PARTIAL();
  2218. RRETURN(MATCH_NOMATCH);
  2219. }
  2220. GETCHARINCTEST(fc, Feptr);
  2221. if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  2222. RRETURN(MATCH_NOMATCH);
  2223. }
  2224. break;
  2225. case PT_PC:
  2226. for (i = 1; i <= Lmin; i++)
  2227. {
  2228. if (Feptr >= mb->end_subject)
  2229. {
  2230. SCHECK_PARTIAL();
  2231. RRETURN(MATCH_NOMATCH);
  2232. }
  2233. GETCHARINCTEST(fc, Feptr);
  2234. if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  2235. RRETURN(MATCH_NOMATCH);
  2236. }
  2237. break;
  2238. case PT_SC:
  2239. for (i = 1; i <= Lmin; i++)
  2240. {
  2241. if (Feptr >= mb->end_subject)
  2242. {
  2243. SCHECK_PARTIAL();
  2244. RRETURN(MATCH_NOMATCH);
  2245. }
  2246. GETCHARINCTEST(fc, Feptr);
  2247. if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  2248. RRETURN(MATCH_NOMATCH);
  2249. }
  2250. break;
  2251. case PT_ALNUM:
  2252. for (i = 1; i <= Lmin; i++)
  2253. {
  2254. int category;
  2255. if (Feptr >= mb->end_subject)
  2256. {
  2257. SCHECK_PARTIAL();
  2258. RRETURN(MATCH_NOMATCH);
  2259. }
  2260. GETCHARINCTEST(fc, Feptr);
  2261. category = UCD_CATEGORY(fc);
  2262. if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
  2263. RRETURN(MATCH_NOMATCH);
  2264. }
  2265. break;
  2266. /* Perl space used to exclude VT, but from Perl 5.18 it is included,
  2267. which means that Perl space and POSIX space are now identical. PCRE
  2268. was changed at release 8.34. */
  2269. case PT_SPACE: /* Perl space */
  2270. case PT_PXSPACE: /* POSIX space */
  2271. for (i = 1; i <= Lmin; i++)
  2272. {
  2273. if (Feptr >= mb->end_subject)
  2274. {
  2275. SCHECK_PARTIAL();
  2276. RRETURN(MATCH_NOMATCH);
  2277. }
  2278. GETCHARINCTEST(fc, Feptr);
  2279. switch(fc)
  2280. {
  2281. HSPACE_CASES:
  2282. VSPACE_CASES:
  2283. if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2284. break;
  2285. default:
  2286. if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
  2287. RRETURN(MATCH_NOMATCH);
  2288. break;
  2289. }
  2290. }
  2291. break;
  2292. case PT_WORD:
  2293. for (i = 1; i <= Lmin; i++)
  2294. {
  2295. int category;
  2296. if (Feptr >= mb->end_subject)
  2297. {
  2298. SCHECK_PARTIAL();
  2299. RRETURN(MATCH_NOMATCH);
  2300. }
  2301. GETCHARINCTEST(fc, Feptr);
  2302. category = UCD_CATEGORY(fc);
  2303. if ((category == ucp_L || category == ucp_N ||
  2304. fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
  2305. RRETURN(MATCH_NOMATCH);
  2306. }
  2307. break;
  2308. case PT_CLIST:
  2309. for (i = 1; i <= Lmin; i++)
  2310. {
  2311. const uint32_t *cp;
  2312. if (Feptr >= mb->end_subject)
  2313. {
  2314. SCHECK_PARTIAL();
  2315. RRETURN(MATCH_NOMATCH);
  2316. }
  2317. GETCHARINCTEST(fc, Feptr);
  2318. cp = PRIV(ucd_caseless_sets) + Lpropvalue;
  2319. for (;;)
  2320. {
  2321. if (fc < *cp)
  2322. {
  2323. if (Lctype == OP_NOTPROP) break;
  2324. RRETURN(MATCH_NOMATCH);
  2325. }
  2326. if (fc == *cp++)
  2327. {
  2328. if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2329. break;
  2330. }
  2331. }
  2332. }
  2333. break;
  2334. case PT_UCNC:
  2335. for (i = 1; i <= Lmin; i++)
  2336. {
  2337. if (Feptr >= mb->end_subject)
  2338. {
  2339. SCHECK_PARTIAL();
  2340. RRETURN(MATCH_NOMATCH);
  2341. }
  2342. GETCHARINCTEST(fc, Feptr);
  2343. if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
  2344. fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
  2345. fc >= 0xe000) == (Lctype == OP_NOTPROP))
  2346. RRETURN(MATCH_NOMATCH);
  2347. }
  2348. break;
  2349. /* This should not occur */
  2350. default:
  2351. return PCRE2_ERROR_INTERNAL;
  2352. }
  2353. }
  2354. /* Match extended Unicode sequences. We will get here only if the
  2355. support is in the binary; otherwise a compile-time error occurs. */
  2356. else if (Lctype == OP_EXTUNI)
  2357. {
  2358. for (i = 1; i <= Lmin; i++)
  2359. {
  2360. if (Feptr >= mb->end_subject)
  2361. {
  2362. SCHECK_PARTIAL();
  2363. RRETURN(MATCH_NOMATCH);
  2364. }
  2365. else
  2366. {
  2367. GETCHARINCTEST(fc, Feptr);
  2368. Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
  2369. mb->end_subject, utf, NULL);
  2370. }
  2371. CHECK_PARTIAL();
  2372. }
  2373. }
  2374. else
  2375. #endif /* SUPPORT_UNICODE */
  2376. /* Handle all other cases in UTF mode */
  2377. #ifdef SUPPORT_UNICODE
  2378. if (utf) switch(Lctype)
  2379. {
  2380. case OP_ANY:
  2381. for (i = 1; i <= Lmin; i++)
  2382. {
  2383. if (Feptr >= mb->end_subject)
  2384. {
  2385. SCHECK_PARTIAL();
  2386. RRETURN(MATCH_NOMATCH);
  2387. }
  2388. if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
  2389. if (mb->partial != 0 &&
  2390. Feptr + 1 >= mb->end_subject &&
  2391. NLBLOCK->nltype == NLTYPE_FIXED &&
  2392. NLBLOCK->nllen == 2 &&
  2393. UCHAR21(Feptr) == NLBLOCK->nl[0])
  2394. {
  2395. mb->hitend = TRUE;
  2396. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  2397. }
  2398. Feptr++;
  2399. ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  2400. }
  2401. break;
  2402. case OP_ALLANY:
  2403. for (i = 1; i <= Lmin; i++)
  2404. {
  2405. if (Feptr >= mb->end_subject)
  2406. {
  2407. SCHECK_PARTIAL();
  2408. RRETURN(MATCH_NOMATCH);
  2409. }
  2410. Feptr++;
  2411. ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  2412. }
  2413. break;
  2414. case OP_ANYBYTE:
  2415. if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
  2416. Feptr += Lmin;
  2417. break;
  2418. case OP_ANYNL:
  2419. for (i = 1; i <= Lmin; i++)
  2420. {
  2421. if (Feptr >= mb->end_subject)
  2422. {
  2423. SCHECK_PARTIAL();
  2424. RRETURN(MATCH_NOMATCH);
  2425. }
  2426. GETCHARINC(fc, Feptr);
  2427. switch(fc)
  2428. {
  2429. default: RRETURN(MATCH_NOMATCH);
  2430. case CHAR_CR:
  2431. if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
  2432. break;
  2433. case CHAR_LF:
  2434. break;
  2435. case CHAR_VT:
  2436. case CHAR_FF:
  2437. case CHAR_NEL:
  2438. #ifndef EBCDIC
  2439. case 0x2028:
  2440. case 0x2029:
  2441. #endif /* Not EBCDIC */
  2442. if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
  2443. break;
  2444. }
  2445. }
  2446. break;
  2447. case OP_NOT_HSPACE:
  2448. for (i = 1; i <= Lmin; i++)
  2449. {
  2450. if (Feptr >= mb->end_subject)
  2451. {
  2452. SCHECK_PARTIAL();
  2453. RRETURN(MATCH_NOMATCH);
  2454. }
  2455. GETCHARINC(fc, Feptr);
  2456. switch(fc)
  2457. {
  2458. HSPACE_CASES: RRETURN(MATCH_NOMATCH);
  2459. default: break;
  2460. }
  2461. }
  2462. break;
  2463. case OP_HSPACE:
  2464. for (i = 1; i <= Lmin; i++)
  2465. {
  2466. if (Feptr >= mb->end_subject)
  2467. {
  2468. SCHECK_PARTIAL();
  2469. RRETURN(MATCH_NOMATCH);
  2470. }
  2471. GETCHARINC(fc, Feptr);
  2472. switch(fc)
  2473. {
  2474. HSPACE_CASES: break;
  2475. default: RRETURN(MATCH_NOMATCH);
  2476. }
  2477. }
  2478. break;
  2479. case OP_NOT_VSPACE:
  2480. for (i = 1; i <= Lmin; i++)
  2481. {
  2482. if (Feptr >= mb->end_subject)
  2483. {
  2484. SCHECK_PARTIAL();
  2485. RRETURN(MATCH_NOMATCH);
  2486. }
  2487. GETCHARINC(fc, Feptr);
  2488. switch(fc)
  2489. {
  2490. VSPACE_CASES: RRETURN(MATCH_NOMATCH);
  2491. default: break;
  2492. }
  2493. }
  2494. break;
  2495. case OP_VSPACE:
  2496. for (i = 1; i <= Lmin; i++)
  2497. {
  2498. if (Feptr >= mb->end_subject)
  2499. {
  2500. SCHECK_PARTIAL();
  2501. RRETURN(MATCH_NOMATCH);
  2502. }
  2503. GETCHARINC(fc, Feptr);
  2504. switch(fc)
  2505. {
  2506. VSPACE_CASES: break;
  2507. default: RRETURN(MATCH_NOMATCH);
  2508. }
  2509. }
  2510. break;
  2511. case OP_NOT_DIGIT:
  2512. for (i = 1; i <= Lmin; i++)
  2513. {
  2514. if (Feptr >= mb->end_subject)
  2515. {
  2516. SCHECK_PARTIAL();
  2517. RRETURN(MATCH_NOMATCH);
  2518. }
  2519. GETCHARINC(fc, Feptr);
  2520. if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
  2521. RRETURN(MATCH_NOMATCH);
  2522. }
  2523. break;
  2524. case OP_DIGIT:
  2525. for (i = 1; i <= Lmin; i++)
  2526. {
  2527. uint32_t cc;
  2528. if (Feptr >= mb->end_subject)
  2529. {
  2530. SCHECK_PARTIAL();
  2531. RRETURN(MATCH_NOMATCH);
  2532. }
  2533. cc = UCHAR21(Feptr);
  2534. if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
  2535. RRETURN(MATCH_NOMATCH);
  2536. Feptr++;
  2537. /* No need to skip more code units - we know it has only one. */
  2538. }
  2539. break;
  2540. case OP_NOT_WHITESPACE:
  2541. for (i = 1; i <= Lmin; i++)
  2542. {
  2543. uint32_t cc;
  2544. if (Feptr >= mb->end_subject)
  2545. {
  2546. SCHECK_PARTIAL();
  2547. RRETURN(MATCH_NOMATCH);
  2548. }
  2549. cc = UCHAR21(Feptr);
  2550. if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
  2551. RRETURN(MATCH_NOMATCH);
  2552. Feptr++;
  2553. ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  2554. }
  2555. break;
  2556. case OP_WHITESPACE:
  2557. for (i = 1; i <= Lmin; i++)
  2558. {
  2559. uint32_t cc;
  2560. if (Feptr >= mb->end_subject)
  2561. {
  2562. SCHECK_PARTIAL();
  2563. RRETURN(MATCH_NOMATCH);
  2564. }
  2565. cc = UCHAR21(Feptr);
  2566. if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
  2567. RRETURN(MATCH_NOMATCH);
  2568. Feptr++;
  2569. /* No need to skip more code units - we know it has only one. */
  2570. }
  2571. break;
  2572. case OP_NOT_WORDCHAR:
  2573. for (i = 1; i <= Lmin; i++)
  2574. {
  2575. uint32_t cc;
  2576. if (Feptr >= mb->end_subject)
  2577. {
  2578. SCHECK_PARTIAL();
  2579. RRETURN(MATCH_NOMATCH);
  2580. }
  2581. cc = UCHAR21(Feptr);
  2582. if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
  2583. RRETURN(MATCH_NOMATCH);
  2584. Feptr++;
  2585. ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  2586. }
  2587. break;
  2588. case OP_WORDCHAR:
  2589. for (i = 1; i <= Lmin; i++)
  2590. {
  2591. uint32_t cc;
  2592. if (Feptr >= mb->end_subject)
  2593. {
  2594. SCHECK_PARTIAL();
  2595. RRETURN(MATCH_NOMATCH);
  2596. }
  2597. cc = UCHAR21(Feptr);
  2598. if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
  2599. RRETURN(MATCH_NOMATCH);
  2600. Feptr++;
  2601. /* No need to skip more code units - we know it has only one. */
  2602. }
  2603. break;
  2604. default:
  2605. return PCRE2_ERROR_INTERNAL;
  2606. } /* End switch(Lctype) */
  2607. else
  2608. #endif /* SUPPORT_UNICODE */
  2609. /* Code for the non-UTF case for minimum matching of operators other
  2610. than OP_PROP and OP_NOTPROP. */
  2611. switch(Lctype)
  2612. {
  2613. case OP_ANY:
  2614. for (i = 1; i <= Lmin; i++)
  2615. {
  2616. if (Feptr >= mb->end_subject)
  2617. {
  2618. SCHECK_PARTIAL();
  2619. RRETURN(MATCH_NOMATCH);
  2620. }
  2621. if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
  2622. if (mb->partial != 0 &&
  2623. Feptr + 1 >= mb->end_subject &&
  2624. NLBLOCK->nltype == NLTYPE_FIXED &&
  2625. NLBLOCK->nllen == 2 &&
  2626. *Feptr == NLBLOCK->nl[0])
  2627. {
  2628. mb->hitend = TRUE;
  2629. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  2630. }
  2631. Feptr++;
  2632. }
  2633. break;
  2634. case OP_ALLANY:
  2635. if (Feptr > mb->end_subject - Lmin)
  2636. {
  2637. SCHECK_PARTIAL();
  2638. RRETURN(MATCH_NOMATCH);
  2639. }
  2640. Feptr += Lmin;
  2641. break;
  2642. /* This OP_ANYBYTE case will never be reached because \C gets turned
  2643. into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
  2644. reports don't complain about it's never being used. */
  2645. /* case OP_ANYBYTE:
  2646. * if (Feptr > mb->end_subject - Lmin)
  2647. * {
  2648. * SCHECK_PARTIAL();
  2649. * RRETURN(MATCH_NOMATCH);
  2650. * }
  2651. * Feptr += Lmin;
  2652. * break;
  2653. */
  2654. case OP_ANYNL:
  2655. for (i = 1; i <= Lmin; i++)
  2656. {
  2657. if (Feptr >= mb->end_subject)
  2658. {
  2659. SCHECK_PARTIAL();
  2660. RRETURN(MATCH_NOMATCH);
  2661. }
  2662. switch(*Feptr++)
  2663. {
  2664. default: RRETURN(MATCH_NOMATCH);
  2665. case CHAR_CR:
  2666. if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
  2667. break;
  2668. case CHAR_LF:
  2669. break;
  2670. case CHAR_VT:
  2671. case CHAR_FF:
  2672. case CHAR_NEL:
  2673. #if PCRE2_CODE_UNIT_WIDTH != 8
  2674. case 0x2028:
  2675. case 0x2029:
  2676. #endif
  2677. if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
  2678. break;
  2679. }
  2680. }
  2681. break;
  2682. case OP_NOT_HSPACE:
  2683. for (i = 1; i <= Lmin; i++)
  2684. {
  2685. if (Feptr >= mb->end_subject)
  2686. {
  2687. SCHECK_PARTIAL();
  2688. RRETURN(MATCH_NOMATCH);
  2689. }
  2690. switch(*Feptr++)
  2691. {
  2692. default: break;
  2693. HSPACE_BYTE_CASES:
  2694. #if PCRE2_CODE_UNIT_WIDTH != 8
  2695. HSPACE_MULTIBYTE_CASES:
  2696. #endif
  2697. RRETURN(MATCH_NOMATCH);
  2698. }
  2699. }
  2700. break;
  2701. case OP_HSPACE:
  2702. for (i = 1; i <= Lmin; i++)
  2703. {
  2704. if (Feptr >= mb->end_subject)
  2705. {
  2706. SCHECK_PARTIAL();
  2707. RRETURN(MATCH_NOMATCH);
  2708. }
  2709. switch(*Feptr++)
  2710. {
  2711. default: RRETURN(MATCH_NOMATCH);
  2712. HSPACE_BYTE_CASES:
  2713. #if PCRE2_CODE_UNIT_WIDTH != 8
  2714. HSPACE_MULTIBYTE_CASES:
  2715. #endif
  2716. break;
  2717. }
  2718. }
  2719. break;
  2720. case OP_NOT_VSPACE:
  2721. for (i = 1; i <= Lmin; i++)
  2722. {
  2723. if (Feptr >= mb->end_subject)
  2724. {
  2725. SCHECK_PARTIAL();
  2726. RRETURN(MATCH_NOMATCH);
  2727. }
  2728. switch(*Feptr++)
  2729. {
  2730. VSPACE_BYTE_CASES:
  2731. #if PCRE2_CODE_UNIT_WIDTH != 8
  2732. VSPACE_MULTIBYTE_CASES:
  2733. #endif
  2734. RRETURN(MATCH_NOMATCH);
  2735. default: break;
  2736. }
  2737. }
  2738. break;
  2739. case OP_VSPACE:
  2740. for (i = 1; i <= Lmin; i++)
  2741. {
  2742. if (Feptr >= mb->end_subject)
  2743. {
  2744. SCHECK_PARTIAL();
  2745. RRETURN(MATCH_NOMATCH);
  2746. }
  2747. switch(*Feptr++)
  2748. {
  2749. default: RRETURN(MATCH_NOMATCH);
  2750. VSPACE_BYTE_CASES:
  2751. #if PCRE2_CODE_UNIT_WIDTH != 8
  2752. VSPACE_MULTIBYTE_CASES:
  2753. #endif
  2754. break;
  2755. }
  2756. }
  2757. break;
  2758. case OP_NOT_DIGIT:
  2759. for (i = 1; i <= Lmin; i++)
  2760. {
  2761. if (Feptr >= mb->end_subject)
  2762. {
  2763. SCHECK_PARTIAL();
  2764. RRETURN(MATCH_NOMATCH);
  2765. }
  2766. if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
  2767. RRETURN(MATCH_NOMATCH);
  2768. Feptr++;
  2769. }
  2770. break;
  2771. case OP_DIGIT:
  2772. for (i = 1; i <= Lmin; i++)
  2773. {
  2774. if (Feptr >= mb->end_subject)
  2775. {
  2776. SCHECK_PARTIAL();
  2777. RRETURN(MATCH_NOMATCH);
  2778. }
  2779. if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
  2780. RRETURN(MATCH_NOMATCH);
  2781. Feptr++;
  2782. }
  2783. break;
  2784. case OP_NOT_WHITESPACE:
  2785. for (i = 1; i <= Lmin; i++)
  2786. {
  2787. if (Feptr >= mb->end_subject)
  2788. {
  2789. SCHECK_PARTIAL();
  2790. RRETURN(MATCH_NOMATCH);
  2791. }
  2792. if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
  2793. RRETURN(MATCH_NOMATCH);
  2794. Feptr++;
  2795. }
  2796. break;
  2797. case OP_WHITESPACE:
  2798. for (i = 1; i <= Lmin; i++)
  2799. {
  2800. if (Feptr >= mb->end_subject)
  2801. {
  2802. SCHECK_PARTIAL();
  2803. RRETURN(MATCH_NOMATCH);
  2804. }
  2805. if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
  2806. RRETURN(MATCH_NOMATCH);
  2807. Feptr++;
  2808. }
  2809. break;
  2810. case OP_NOT_WORDCHAR:
  2811. for (i = 1; i <= Lmin; i++)
  2812. {
  2813. if (Feptr >= mb->end_subject)
  2814. {
  2815. SCHECK_PARTIAL();
  2816. RRETURN(MATCH_NOMATCH);
  2817. }
  2818. if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
  2819. RRETURN(MATCH_NOMATCH);
  2820. Feptr++;
  2821. }
  2822. break;
  2823. case OP_WORDCHAR:
  2824. for (i = 1; i <= Lmin; i++)
  2825. {
  2826. if (Feptr >= mb->end_subject)
  2827. {
  2828. SCHECK_PARTIAL();
  2829. RRETURN(MATCH_NOMATCH);
  2830. }
  2831. if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
  2832. RRETURN(MATCH_NOMATCH);
  2833. Feptr++;
  2834. }
  2835. break;
  2836. default:
  2837. return PCRE2_ERROR_INTERNAL;
  2838. }
  2839. }
  2840. /* If Lmin = Lmax we are done. Continue with the main loop. */
  2841. if (Lmin == Lmax) continue;
  2842. /* If minimizing, we have to test the rest of the pattern before each
  2843. subsequent match. */
  2844. if (reptype == REPTYPE_MIN)
  2845. {
  2846. #ifdef SUPPORT_UNICODE
  2847. if (proptype >= 0)
  2848. {
  2849. switch(proptype)
  2850. {
  2851. case PT_ANY:
  2852. for (;;)
  2853. {
  2854. RMATCH(Fecode, RM208);
  2855. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2856. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2857. if (Feptr >= mb->end_subject)
  2858. {
  2859. SCHECK_PARTIAL();
  2860. RRETURN(MATCH_NOMATCH);
  2861. }
  2862. GETCHARINCTEST(fc, Feptr);
  2863. if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2864. }
  2865. /* Control never gets here */
  2866. case PT_LAMP:
  2867. for (;;)
  2868. {
  2869. int chartype;
  2870. RMATCH(Fecode, RM209);
  2871. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2872. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2873. if (Feptr >= mb->end_subject)
  2874. {
  2875. SCHECK_PARTIAL();
  2876. RRETURN(MATCH_NOMATCH);
  2877. }
  2878. GETCHARINCTEST(fc, Feptr);
  2879. chartype = UCD_CHARTYPE(fc);
  2880. if ((chartype == ucp_Lu ||
  2881. chartype == ucp_Ll ||
  2882. chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
  2883. RRETURN(MATCH_NOMATCH);
  2884. }
  2885. /* Control never gets here */
  2886. case PT_GC:
  2887. for (;;)
  2888. {
  2889. RMATCH(Fecode, RM210);
  2890. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2891. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2892. if (Feptr >= mb->end_subject)
  2893. {
  2894. SCHECK_PARTIAL();
  2895. RRETURN(MATCH_NOMATCH);
  2896. }
  2897. GETCHARINCTEST(fc, Feptr);
  2898. if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  2899. RRETURN(MATCH_NOMATCH);
  2900. }
  2901. /* Control never gets here */
  2902. case PT_PC:
  2903. for (;;)
  2904. {
  2905. RMATCH(Fecode, RM211);
  2906. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2907. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2908. if (Feptr >= mb->end_subject)
  2909. {
  2910. SCHECK_PARTIAL();
  2911. RRETURN(MATCH_NOMATCH);
  2912. }
  2913. GETCHARINCTEST(fc, Feptr);
  2914. if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  2915. RRETURN(MATCH_NOMATCH);
  2916. }
  2917. /* Control never gets here */
  2918. case PT_SC:
  2919. for (;;)
  2920. {
  2921. RMATCH(Fecode, RM212);
  2922. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2923. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2924. if (Feptr >= mb->end_subject)
  2925. {
  2926. SCHECK_PARTIAL();
  2927. RRETURN(MATCH_NOMATCH);
  2928. }
  2929. GETCHARINCTEST(fc, Feptr);
  2930. if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  2931. RRETURN(MATCH_NOMATCH);
  2932. }
  2933. /* Control never gets here */
  2934. case PT_ALNUM:
  2935. for (;;)
  2936. {
  2937. int category;
  2938. RMATCH(Fecode, RM213);
  2939. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2940. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2941. if (Feptr >= mb->end_subject)
  2942. {
  2943. SCHECK_PARTIAL();
  2944. RRETURN(MATCH_NOMATCH);
  2945. }
  2946. GETCHARINCTEST(fc, Feptr);
  2947. category = UCD_CATEGORY(fc);
  2948. if ((category == ucp_L || category == ucp_N) ==
  2949. (Lctype == OP_NOTPROP))
  2950. RRETURN(MATCH_NOMATCH);
  2951. }
  2952. /* Control never gets here */
  2953. /* Perl space used to exclude VT, but from Perl 5.18 it is included,
  2954. which means that Perl space and POSIX space are now identical. PCRE
  2955. was changed at release 8.34. */
  2956. case PT_SPACE: /* Perl space */
  2957. case PT_PXSPACE: /* POSIX space */
  2958. for (;;)
  2959. {
  2960. RMATCH(Fecode, RM214);
  2961. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2962. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2963. if (Feptr >= mb->end_subject)
  2964. {
  2965. SCHECK_PARTIAL();
  2966. RRETURN(MATCH_NOMATCH);
  2967. }
  2968. GETCHARINCTEST(fc, Feptr);
  2969. switch(fc)
  2970. {
  2971. HSPACE_CASES:
  2972. VSPACE_CASES:
  2973. if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  2974. break;
  2975. default:
  2976. if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
  2977. RRETURN(MATCH_NOMATCH);
  2978. break;
  2979. }
  2980. }
  2981. /* Control never gets here */
  2982. case PT_WORD:
  2983. for (;;)
  2984. {
  2985. int category;
  2986. RMATCH(Fecode, RM215);
  2987. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  2988. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  2989. if (Feptr >= mb->end_subject)
  2990. {
  2991. SCHECK_PARTIAL();
  2992. RRETURN(MATCH_NOMATCH);
  2993. }
  2994. GETCHARINCTEST(fc, Feptr);
  2995. category = UCD_CATEGORY(fc);
  2996. if ((category == ucp_L ||
  2997. category == ucp_N ||
  2998. fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
  2999. RRETURN(MATCH_NOMATCH);
  3000. }
  3001. /* Control never gets here */
  3002. case PT_CLIST:
  3003. for (;;)
  3004. {
  3005. const uint32_t *cp;
  3006. RMATCH(Fecode, RM216);
  3007. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3008. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  3009. if (Feptr >= mb->end_subject)
  3010. {
  3011. SCHECK_PARTIAL();
  3012. RRETURN(MATCH_NOMATCH);
  3013. }
  3014. GETCHARINCTEST(fc, Feptr);
  3015. cp = PRIV(ucd_caseless_sets) + Lpropvalue;
  3016. for (;;)
  3017. {
  3018. if (fc < *cp)
  3019. {
  3020. if (Lctype == OP_NOTPROP) break;
  3021. RRETURN(MATCH_NOMATCH);
  3022. }
  3023. if (fc == *cp++)
  3024. {
  3025. if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
  3026. break;
  3027. }
  3028. }
  3029. }
  3030. /* Control never gets here */
  3031. case PT_UCNC:
  3032. for (;;)
  3033. {
  3034. RMATCH(Fecode, RM217);
  3035. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3036. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  3037. if (Feptr >= mb->end_subject)
  3038. {
  3039. SCHECK_PARTIAL();
  3040. RRETURN(MATCH_NOMATCH);
  3041. }
  3042. GETCHARINCTEST(fc, Feptr);
  3043. if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
  3044. fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
  3045. fc >= 0xe000) == (Lctype == OP_NOTPROP))
  3046. RRETURN(MATCH_NOMATCH);
  3047. }
  3048. /* Control never gets here */
  3049. /* This should never occur */
  3050. default:
  3051. return PCRE2_ERROR_INTERNAL;
  3052. }
  3053. }
  3054. /* Match extended Unicode sequences. We will get here only if the
  3055. support is in the binary; otherwise a compile-time error occurs. */
  3056. else if (Lctype == OP_EXTUNI)
  3057. {
  3058. for (;;)
  3059. {
  3060. RMATCH(Fecode, RM218);
  3061. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3062. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  3063. if (Feptr >= mb->end_subject)
  3064. {
  3065. SCHECK_PARTIAL();
  3066. RRETURN(MATCH_NOMATCH);
  3067. }
  3068. else
  3069. {
  3070. GETCHARINCTEST(fc, Feptr);
  3071. Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
  3072. utf, NULL);
  3073. }
  3074. CHECK_PARTIAL();
  3075. }
  3076. }
  3077. else
  3078. #endif /* SUPPORT_UNICODE */
  3079. /* UTF mode for non-property testing character types. */
  3080. #ifdef SUPPORT_UNICODE
  3081. if (utf)
  3082. {
  3083. for (;;)
  3084. {
  3085. RMATCH(Fecode, RM219);
  3086. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3087. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  3088. if (Feptr >= mb->end_subject)
  3089. {
  3090. SCHECK_PARTIAL();
  3091. RRETURN(MATCH_NOMATCH);
  3092. }
  3093. if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
  3094. GETCHARINC(fc, Feptr);
  3095. switch(Lctype)
  3096. {
  3097. case OP_ANY: /* This is the non-NL case */
  3098. if (mb->partial != 0 && /* Take care with CRLF partial */
  3099. Feptr >= mb->end_subject &&
  3100. NLBLOCK->nltype == NLTYPE_FIXED &&
  3101. NLBLOCK->nllen == 2 &&
  3102. fc == NLBLOCK->nl[0])
  3103. {
  3104. mb->hitend = TRUE;
  3105. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  3106. }
  3107. break;
  3108. case OP_ALLANY:
  3109. case OP_ANYBYTE:
  3110. break;
  3111. case OP_ANYNL:
  3112. switch(fc)
  3113. {
  3114. default: RRETURN(MATCH_NOMATCH);
  3115. case CHAR_CR:
  3116. if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
  3117. break;
  3118. case CHAR_LF:
  3119. break;
  3120. case CHAR_VT:
  3121. case CHAR_FF:
  3122. case CHAR_NEL:
  3123. #ifndef EBCDIC
  3124. case 0x2028:
  3125. case 0x2029:
  3126. #endif /* Not EBCDIC */
  3127. if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
  3128. RRETURN(MATCH_NOMATCH);
  3129. break;
  3130. }
  3131. break;
  3132. case OP_NOT_HSPACE:
  3133. switch(fc)
  3134. {
  3135. HSPACE_CASES: RRETURN(MATCH_NOMATCH);
  3136. default: break;
  3137. }
  3138. break;
  3139. case OP_HSPACE:
  3140. switch(fc)
  3141. {
  3142. HSPACE_CASES: break;
  3143. default: RRETURN(MATCH_NOMATCH);
  3144. }
  3145. break;
  3146. case OP_NOT_VSPACE:
  3147. switch(fc)
  3148. {
  3149. VSPACE_CASES: RRETURN(MATCH_NOMATCH);
  3150. default: break;
  3151. }
  3152. break;
  3153. case OP_VSPACE:
  3154. switch(fc)
  3155. {
  3156. VSPACE_CASES: break;
  3157. default: RRETURN(MATCH_NOMATCH);
  3158. }
  3159. break;
  3160. case OP_NOT_DIGIT:
  3161. if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
  3162. RRETURN(MATCH_NOMATCH);
  3163. break;
  3164. case OP_DIGIT:
  3165. if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
  3166. RRETURN(MATCH_NOMATCH);
  3167. break;
  3168. case OP_NOT_WHITESPACE:
  3169. if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
  3170. RRETURN(MATCH_NOMATCH);
  3171. break;
  3172. case OP_WHITESPACE:
  3173. if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
  3174. RRETURN(MATCH_NOMATCH);
  3175. break;
  3176. case OP_NOT_WORDCHAR:
  3177. if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
  3178. RRETURN(MATCH_NOMATCH);
  3179. break;
  3180. case OP_WORDCHAR:
  3181. if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
  3182. RRETURN(MATCH_NOMATCH);
  3183. break;
  3184. default:
  3185. return PCRE2_ERROR_INTERNAL;
  3186. }
  3187. }
  3188. }
  3189. else
  3190. #endif /* SUPPORT_UNICODE */
  3191. /* Not UTF mode */
  3192. {
  3193. for (;;)
  3194. {
  3195. RMATCH(Fecode, RM33);
  3196. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3197. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  3198. if (Feptr >= mb->end_subject)
  3199. {
  3200. SCHECK_PARTIAL();
  3201. RRETURN(MATCH_NOMATCH);
  3202. }
  3203. if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
  3204. RRETURN(MATCH_NOMATCH);
  3205. fc = *Feptr++;
  3206. switch(Lctype)
  3207. {
  3208. case OP_ANY: /* This is the non-NL case */
  3209. if (mb->partial != 0 && /* Take care with CRLF partial */
  3210. Feptr >= mb->end_subject &&
  3211. NLBLOCK->nltype == NLTYPE_FIXED &&
  3212. NLBLOCK->nllen == 2 &&
  3213. fc == NLBLOCK->nl[0])
  3214. {
  3215. mb->hitend = TRUE;
  3216. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  3217. }
  3218. break;
  3219. case OP_ALLANY:
  3220. case OP_ANYBYTE:
  3221. break;
  3222. case OP_ANYNL:
  3223. switch(fc)
  3224. {
  3225. default: RRETURN(MATCH_NOMATCH);
  3226. case CHAR_CR:
  3227. if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
  3228. break;
  3229. case CHAR_LF:
  3230. break;
  3231. case CHAR_VT:
  3232. case CHAR_FF:
  3233. case CHAR_NEL:
  3234. #if PCRE2_CODE_UNIT_WIDTH != 8
  3235. case 0x2028:
  3236. case 0x2029:
  3237. #endif
  3238. if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
  3239. RRETURN(MATCH_NOMATCH);
  3240. break;
  3241. }
  3242. break;
  3243. case OP_NOT_HSPACE:
  3244. switch(fc)
  3245. {
  3246. default: break;
  3247. HSPACE_BYTE_CASES:
  3248. #if PCRE2_CODE_UNIT_WIDTH != 8
  3249. HSPACE_MULTIBYTE_CASES:
  3250. #endif
  3251. RRETURN(MATCH_NOMATCH);
  3252. }
  3253. break;
  3254. case OP_HSPACE:
  3255. switch(fc)
  3256. {
  3257. default: RRETURN(MATCH_NOMATCH);
  3258. HSPACE_BYTE_CASES:
  3259. #if PCRE2_CODE_UNIT_WIDTH != 8
  3260. HSPACE_MULTIBYTE_CASES:
  3261. #endif
  3262. break;
  3263. }
  3264. break;
  3265. case OP_NOT_VSPACE:
  3266. switch(fc)
  3267. {
  3268. default: break;
  3269. VSPACE_BYTE_CASES:
  3270. #if PCRE2_CODE_UNIT_WIDTH != 8
  3271. VSPACE_MULTIBYTE_CASES:
  3272. #endif
  3273. RRETURN(MATCH_NOMATCH);
  3274. }
  3275. break;
  3276. case OP_VSPACE:
  3277. switch(fc)
  3278. {
  3279. default: RRETURN(MATCH_NOMATCH);
  3280. VSPACE_BYTE_CASES:
  3281. #if PCRE2_CODE_UNIT_WIDTH != 8
  3282. VSPACE_MULTIBYTE_CASES:
  3283. #endif
  3284. break;
  3285. }
  3286. break;
  3287. case OP_NOT_DIGIT:
  3288. if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
  3289. RRETURN(MATCH_NOMATCH);
  3290. break;
  3291. case OP_DIGIT:
  3292. if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
  3293. RRETURN(MATCH_NOMATCH);
  3294. break;
  3295. case OP_NOT_WHITESPACE:
  3296. if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
  3297. RRETURN(MATCH_NOMATCH);
  3298. break;
  3299. case OP_WHITESPACE:
  3300. if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
  3301. RRETURN(MATCH_NOMATCH);
  3302. break;
  3303. case OP_NOT_WORDCHAR:
  3304. if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
  3305. RRETURN(MATCH_NOMATCH);
  3306. break;
  3307. case OP_WORDCHAR:
  3308. if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
  3309. RRETURN(MATCH_NOMATCH);
  3310. break;
  3311. default:
  3312. return PCRE2_ERROR_INTERNAL;
  3313. }
  3314. }
  3315. }
  3316. /* Control never gets here */
  3317. }
  3318. /* If maximizing, it is worth using inline code for speed, doing the type
  3319. test once at the start (i.e. keep it out of the loop). */
  3320. else
  3321. {
  3322. Lstart_eptr = Feptr; /* Remember where we started */
  3323. #ifdef SUPPORT_UNICODE
  3324. if (proptype >= 0)
  3325. {
  3326. switch(proptype)
  3327. {
  3328. case PT_ANY:
  3329. for (i = Lmin; i < Lmax; i++)
  3330. {
  3331. int len = 1;
  3332. if (Feptr >= mb->end_subject)
  3333. {
  3334. SCHECK_PARTIAL();
  3335. break;
  3336. }
  3337. GETCHARLENTEST(fc, Feptr, len);
  3338. if (Lctype == OP_NOTPROP) break;
  3339. Feptr+= len;
  3340. }
  3341. break;
  3342. case PT_LAMP:
  3343. for (i = Lmin; i < Lmax; i++)
  3344. {
  3345. int chartype;
  3346. int len = 1;
  3347. if (Feptr >= mb->end_subject)
  3348. {
  3349. SCHECK_PARTIAL();
  3350. break;
  3351. }
  3352. GETCHARLENTEST(fc, Feptr, len);
  3353. chartype = UCD_CHARTYPE(fc);
  3354. if ((chartype == ucp_Lu ||
  3355. chartype == ucp_Ll ||
  3356. chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
  3357. break;
  3358. Feptr+= len;
  3359. }
  3360. break;
  3361. case PT_GC:
  3362. for (i = Lmin; i < Lmax; i++)
  3363. {
  3364. int len = 1;
  3365. if (Feptr >= mb->end_subject)
  3366. {
  3367. SCHECK_PARTIAL();
  3368. break;
  3369. }
  3370. GETCHARLENTEST(fc, Feptr, len);
  3371. if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  3372. break;
  3373. Feptr+= len;
  3374. }
  3375. break;
  3376. case PT_PC:
  3377. for (i = Lmin; i < Lmax; i++)
  3378. {
  3379. int len = 1;
  3380. if (Feptr >= mb->end_subject)
  3381. {
  3382. SCHECK_PARTIAL();
  3383. break;
  3384. }
  3385. GETCHARLENTEST(fc, Feptr, len);
  3386. if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  3387. break;
  3388. Feptr+= len;
  3389. }
  3390. break;
  3391. case PT_SC:
  3392. for (i = Lmin; i < Lmax; i++)
  3393. {
  3394. int len = 1;
  3395. if (Feptr >= mb->end_subject)
  3396. {
  3397. SCHECK_PARTIAL();
  3398. break;
  3399. }
  3400. GETCHARLENTEST(fc, Feptr, len);
  3401. if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
  3402. break;
  3403. Feptr+= len;
  3404. }
  3405. break;
  3406. case PT_ALNUM:
  3407. for (i = Lmin; i < Lmax; i++)
  3408. {
  3409. int category;
  3410. int len = 1;
  3411. if (Feptr >= mb->end_subject)
  3412. {
  3413. SCHECK_PARTIAL();
  3414. break;
  3415. }
  3416. GETCHARLENTEST(fc, Feptr, len);
  3417. category = UCD_CATEGORY(fc);
  3418. if ((category == ucp_L || category == ucp_N) ==
  3419. (Lctype == OP_NOTPROP))
  3420. break;
  3421. Feptr+= len;
  3422. }
  3423. break;
  3424. /* Perl space used to exclude VT, but from Perl 5.18 it is included,
  3425. which means that Perl space and POSIX space are now identical. PCRE
  3426. was changed at release 8.34. */
  3427. case PT_SPACE: /* Perl space */
  3428. case PT_PXSPACE: /* POSIX space */
  3429. for (i = Lmin; i < Lmax; i++)
  3430. {
  3431. int len = 1;
  3432. if (Feptr >= mb->end_subject)
  3433. {
  3434. SCHECK_PARTIAL();
  3435. break;
  3436. }
  3437. GETCHARLENTEST(fc, Feptr, len);
  3438. switch(fc)
  3439. {
  3440. HSPACE_CASES:
  3441. VSPACE_CASES:
  3442. if (Lctype == OP_NOTPROP) goto ENDLOOP99; /* Break the loop */
  3443. break;
  3444. default:
  3445. if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
  3446. goto ENDLOOP99; /* Break the loop */
  3447. break;
  3448. }
  3449. Feptr+= len;
  3450. }
  3451. ENDLOOP99:
  3452. break;
  3453. case PT_WORD:
  3454. for (i = Lmin; i < Lmax; i++)
  3455. {
  3456. int category;
  3457. int len = 1;
  3458. if (Feptr >= mb->end_subject)
  3459. {
  3460. SCHECK_PARTIAL();
  3461. break;
  3462. }
  3463. GETCHARLENTEST(fc, Feptr, len);
  3464. category = UCD_CATEGORY(fc);
  3465. if ((category == ucp_L || category == ucp_N ||
  3466. fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
  3467. break;
  3468. Feptr+= len;
  3469. }
  3470. break;
  3471. case PT_CLIST:
  3472. for (i = Lmin; i < Lmax; i++)
  3473. {
  3474. const uint32_t *cp;
  3475. int len = 1;
  3476. if (Feptr >= mb->end_subject)
  3477. {
  3478. SCHECK_PARTIAL();
  3479. break;
  3480. }
  3481. GETCHARLENTEST(fc, Feptr, len);
  3482. cp = PRIV(ucd_caseless_sets) + Lpropvalue;
  3483. for (;;)
  3484. {
  3485. if (fc < *cp)
  3486. { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
  3487. if (fc == *cp++)
  3488. { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
  3489. }
  3490. Feptr += len;
  3491. }
  3492. GOT_MAX:
  3493. break;
  3494. case PT_UCNC:
  3495. for (i = Lmin; i < Lmax; i++)
  3496. {
  3497. int len = 1;
  3498. if (Feptr >= mb->end_subject)
  3499. {
  3500. SCHECK_PARTIAL();
  3501. break;
  3502. }
  3503. GETCHARLENTEST(fc, Feptr, len);
  3504. if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
  3505. fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
  3506. fc >= 0xe000) == (Lctype == OP_NOTPROP))
  3507. break;
  3508. Feptr += len;
  3509. }
  3510. break;
  3511. default:
  3512. return PCRE2_ERROR_INTERNAL;
  3513. }
  3514. /* Feptr is now past the end of the maximum run */
  3515. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  3516. /* After \C in UTF mode, Lstart_eptr might be in the middle of a
  3517. Unicode character. Use <= pp to ensure backtracking doesn't go too far.
  3518. */
  3519. for(;;)
  3520. {
  3521. if (Feptr <= Lstart_eptr) break;
  3522. RMATCH(Fecode, RM222);
  3523. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3524. Feptr--;
  3525. if (utf) BACKCHAR(Feptr);
  3526. }
  3527. }
  3528. /* Match extended Unicode grapheme clusters. We will get here only if the
  3529. support is in the binary; otherwise a compile-time error occurs. */
  3530. else if (Lctype == OP_EXTUNI)
  3531. {
  3532. for (i = Lmin; i < Lmax; i++)
  3533. {
  3534. if (Feptr >= mb->end_subject)
  3535. {
  3536. SCHECK_PARTIAL();
  3537. break;
  3538. }
  3539. else
  3540. {
  3541. GETCHARINCTEST(fc, Feptr);
  3542. Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
  3543. utf, NULL);
  3544. }
  3545. CHECK_PARTIAL();
  3546. }
  3547. /* Feptr is now past the end of the maximum run */
  3548. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  3549. /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
  3550. of the run while backtracking because the use of \C in UTF mode can
  3551. cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
  3552. the use of \C in UTF mode is fraught with danger. */
  3553. for(;;)
  3554. {
  3555. int lgb, rgb;
  3556. PCRE2_SPTR fptr;
  3557. if (Feptr <= Lstart_eptr) break; /* At start of char run */
  3558. RMATCH(Fecode, RM220);
  3559. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3560. /* Backtracking over an extended grapheme cluster involves inspecting
  3561. the previous two characters (if present) to see if a break is
  3562. permitted between them. */
  3563. Feptr--;
  3564. if (!utf) fc = *Feptr; else
  3565. {
  3566. BACKCHAR(Feptr);
  3567. GETCHAR(fc, Feptr);
  3568. }
  3569. rgb = UCD_GRAPHBREAK(fc);
  3570. for (;;)
  3571. {
  3572. if (Feptr <= Lstart_eptr) break; /* At start of char run */
  3573. fptr = Feptr - 1;
  3574. if (!utf) fc = *fptr; else
  3575. {
  3576. BACKCHAR(fptr);
  3577. GETCHAR(fc, fptr);
  3578. }
  3579. lgb = UCD_GRAPHBREAK(fc);
  3580. if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
  3581. Feptr = fptr;
  3582. rgb = lgb;
  3583. }
  3584. }
  3585. }
  3586. else
  3587. #endif /* SUPPORT_UNICODE */
  3588. #ifdef SUPPORT_UNICODE
  3589. if (utf)
  3590. {
  3591. switch(Lctype)
  3592. {
  3593. case OP_ANY:
  3594. for (i = Lmin; i < Lmax; i++)
  3595. {
  3596. if (Feptr >= mb->end_subject)
  3597. {
  3598. SCHECK_PARTIAL();
  3599. break;
  3600. }
  3601. if (IS_NEWLINE(Feptr)) break;
  3602. if (mb->partial != 0 && /* Take care with CRLF partial */
  3603. Feptr + 1 >= mb->end_subject &&
  3604. NLBLOCK->nltype == NLTYPE_FIXED &&
  3605. NLBLOCK->nllen == 2 &&
  3606. UCHAR21(Feptr) == NLBLOCK->nl[0])
  3607. {
  3608. mb->hitend = TRUE;
  3609. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  3610. }
  3611. Feptr++;
  3612. ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  3613. }
  3614. break;
  3615. case OP_ALLANY:
  3616. if (Lmax < UINT32_MAX)
  3617. {
  3618. for (i = Lmin; i < Lmax; i++)
  3619. {
  3620. if (Feptr >= mb->end_subject)
  3621. {
  3622. SCHECK_PARTIAL();
  3623. break;
  3624. }
  3625. Feptr++;
  3626. ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
  3627. }
  3628. }
  3629. else
  3630. {
  3631. Feptr = mb->end_subject; /* Unlimited UTF-8 repeat */
  3632. SCHECK_PARTIAL();
  3633. }
  3634. break;
  3635. /* The "byte" (i.e. "code unit") case is the same as non-UTF */
  3636. case OP_ANYBYTE:
  3637. fc = Lmax - Lmin;
  3638. if (fc > (uint32_t)(mb->end_subject - Feptr))
  3639. {
  3640. Feptr = mb->end_subject;
  3641. SCHECK_PARTIAL();
  3642. }
  3643. else Feptr += fc;
  3644. break;
  3645. case OP_ANYNL:
  3646. for (i = Lmin; i < Lmax; i++)
  3647. {
  3648. int len = 1;
  3649. if (Feptr >= mb->end_subject)
  3650. {
  3651. SCHECK_PARTIAL();
  3652. break;
  3653. }
  3654. GETCHARLEN(fc, Feptr, len);
  3655. if (fc == CHAR_CR)
  3656. {
  3657. if (++Feptr >= mb->end_subject) break;
  3658. if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
  3659. }
  3660. else
  3661. {
  3662. if (fc != CHAR_LF &&
  3663. (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
  3664. (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
  3665. #ifndef EBCDIC
  3666. && fc != 0x2028 && fc != 0x2029
  3667. #endif /* Not EBCDIC */
  3668. )))
  3669. break;
  3670. Feptr += len;
  3671. }
  3672. }
  3673. break;
  3674. case OP_NOT_HSPACE:
  3675. case OP_HSPACE:
  3676. for (i = Lmin; i < Lmax; i++)
  3677. {
  3678. BOOL gotspace;
  3679. int len = 1;
  3680. if (Feptr >= mb->end_subject)
  3681. {
  3682. SCHECK_PARTIAL();
  3683. break;
  3684. }
  3685. GETCHARLEN(fc, Feptr, len);
  3686. switch(fc)
  3687. {
  3688. HSPACE_CASES: gotspace = TRUE; break;
  3689. default: gotspace = FALSE; break;
  3690. }
  3691. if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
  3692. Feptr += len;
  3693. }
  3694. break;
  3695. case OP_NOT_VSPACE:
  3696. case OP_VSPACE:
  3697. for (i = Lmin; i < Lmax; i++)
  3698. {
  3699. BOOL gotspace;
  3700. int len = 1;
  3701. if (Feptr >= mb->end_subject)
  3702. {
  3703. SCHECK_PARTIAL();
  3704. break;
  3705. }
  3706. GETCHARLEN(fc, Feptr, len);
  3707. switch(fc)
  3708. {
  3709. VSPACE_CASES: gotspace = TRUE; break;
  3710. default: gotspace = FALSE; break;
  3711. }
  3712. if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
  3713. Feptr += len;
  3714. }
  3715. break;
  3716. case OP_NOT_DIGIT:
  3717. for (i = Lmin; i < Lmax; i++)
  3718. {
  3719. int len = 1;
  3720. if (Feptr >= mb->end_subject)
  3721. {
  3722. SCHECK_PARTIAL();
  3723. break;
  3724. }
  3725. GETCHARLEN(fc, Feptr, len);
  3726. if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
  3727. Feptr+= len;
  3728. }
  3729. break;
  3730. case OP_DIGIT:
  3731. for (i = Lmin; i < Lmax; i++)
  3732. {
  3733. int len = 1;
  3734. if (Feptr >= mb->end_subject)
  3735. {
  3736. SCHECK_PARTIAL();
  3737. break;
  3738. }
  3739. GETCHARLEN(fc, Feptr, len);
  3740. if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
  3741. Feptr+= len;
  3742. }
  3743. break;
  3744. case OP_NOT_WHITESPACE:
  3745. for (i = Lmin; i < Lmax; i++)
  3746. {
  3747. int len = 1;
  3748. if (Feptr >= mb->end_subject)
  3749. {
  3750. SCHECK_PARTIAL();
  3751. break;
  3752. }
  3753. GETCHARLEN(fc, Feptr, len);
  3754. if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
  3755. Feptr+= len;
  3756. }
  3757. break;
  3758. case OP_WHITESPACE:
  3759. for (i = Lmin; i < Lmax; i++)
  3760. {
  3761. int len = 1;
  3762. if (Feptr >= mb->end_subject)
  3763. {
  3764. SCHECK_PARTIAL();
  3765. break;
  3766. }
  3767. GETCHARLEN(fc, Feptr, len);
  3768. if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
  3769. Feptr+= len;
  3770. }
  3771. break;
  3772. case OP_NOT_WORDCHAR:
  3773. for (i = Lmin; i < Lmax; i++)
  3774. {
  3775. int len = 1;
  3776. if (Feptr >= mb->end_subject)
  3777. {
  3778. SCHECK_PARTIAL();
  3779. break;
  3780. }
  3781. GETCHARLEN(fc, Feptr, len);
  3782. if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
  3783. Feptr+= len;
  3784. }
  3785. break;
  3786. case OP_WORDCHAR:
  3787. for (i = Lmin; i < Lmax; i++)
  3788. {
  3789. int len = 1;
  3790. if (Feptr >= mb->end_subject)
  3791. {
  3792. SCHECK_PARTIAL();
  3793. break;
  3794. }
  3795. GETCHARLEN(fc, Feptr, len);
  3796. if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
  3797. Feptr+= len;
  3798. }
  3799. break;
  3800. default:
  3801. return PCRE2_ERROR_INTERNAL;
  3802. }
  3803. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  3804. /* After \C in UTF mode, Lstart_eptr might be in the middle of a
  3805. Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
  3806. too far. */
  3807. for(;;)
  3808. {
  3809. if (Feptr <= Lstart_eptr) break;
  3810. RMATCH(Fecode, RM221);
  3811. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  3812. Feptr--;
  3813. BACKCHAR(Feptr);
  3814. if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
  3815. UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
  3816. Feptr--;
  3817. }
  3818. }
  3819. else
  3820. #endif /* SUPPORT_UNICODE */
  3821. /* Not UTF mode */
  3822. {
  3823. switch(Lctype)
  3824. {
  3825. case OP_ANY:
  3826. for (i = Lmin; i < Lmax; i++)
  3827. {
  3828. if (Feptr >= mb->end_subject)
  3829. {
  3830. SCHECK_PARTIAL();
  3831. break;
  3832. }
  3833. if (IS_NEWLINE(Feptr)) break;
  3834. if (mb->partial != 0 && /* Take care with CRLF partial */
  3835. Feptr + 1 >= mb->end_subject &&
  3836. NLBLOCK->nltype == NLTYPE_FIXED &&
  3837. NLBLOCK->nllen == 2 &&
  3838. *Feptr == NLBLOCK->nl[0])
  3839. {
  3840. mb->hitend = TRUE;
  3841. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  3842. }
  3843. Feptr++;
  3844. }
  3845. break;
  3846. case OP_ALLANY:
  3847. case OP_ANYBYTE:
  3848. fc = Lmax - Lmin;
  3849. if (fc > (uint32_t)(mb->end_subject - Feptr))
  3850. {
  3851. Feptr = mb->end_subject;
  3852. SCHECK_PARTIAL();
  3853. }
  3854. else Feptr += fc;
  3855. break;
  3856. case OP_ANYNL:
  3857. for (i = Lmin; i < Lmax; i++)
  3858. {
  3859. if (Feptr >= mb->end_subject)
  3860. {
  3861. SCHECK_PARTIAL();
  3862. break;
  3863. }
  3864. fc = *Feptr;
  3865. if (fc == CHAR_CR)
  3866. {
  3867. if (++Feptr >= mb->end_subject) break;
  3868. if (*Feptr == CHAR_LF) Feptr++;
  3869. }
  3870. else
  3871. {
  3872. if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
  3873. (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
  3874. #if PCRE2_CODE_UNIT_WIDTH != 8
  3875. && fc != 0x2028 && fc != 0x2029
  3876. #endif
  3877. ))) break;
  3878. Feptr++;
  3879. }
  3880. }
  3881. break;
  3882. case OP_NOT_HSPACE:
  3883. for (i = Lmin; i < Lmax; i++)
  3884. {
  3885. if (Feptr >= mb->end_subject)
  3886. {
  3887. SCHECK_PARTIAL();
  3888. break;
  3889. }
  3890. switch(*Feptr)
  3891. {
  3892. default: Feptr++; break;
  3893. HSPACE_BYTE_CASES:
  3894. #if PCRE2_CODE_UNIT_WIDTH != 8
  3895. HSPACE_MULTIBYTE_CASES:
  3896. #endif
  3897. goto ENDLOOP00;
  3898. }
  3899. }
  3900. ENDLOOP00:
  3901. break;
  3902. case OP_HSPACE:
  3903. for (i = Lmin; i < Lmax; i++)
  3904. {
  3905. if (Feptr >= mb->end_subject)
  3906. {
  3907. SCHECK_PARTIAL();
  3908. break;
  3909. }
  3910. switch(*Feptr)
  3911. {
  3912. default: goto ENDLOOP01;
  3913. HSPACE_BYTE_CASES:
  3914. #if PCRE2_CODE_UNIT_WIDTH != 8
  3915. HSPACE_MULTIBYTE_CASES:
  3916. #endif
  3917. Feptr++; break;
  3918. }
  3919. }
  3920. ENDLOOP01:
  3921. break;
  3922. case OP_NOT_VSPACE:
  3923. for (i = Lmin; i < Lmax; i++)
  3924. {
  3925. if (Feptr >= mb->end_subject)
  3926. {
  3927. SCHECK_PARTIAL();
  3928. break;
  3929. }
  3930. switch(*Feptr)
  3931. {
  3932. default: Feptr++; break;
  3933. VSPACE_BYTE_CASES:
  3934. #if PCRE2_CODE_UNIT_WIDTH != 8
  3935. VSPACE_MULTIBYTE_CASES:
  3936. #endif
  3937. goto ENDLOOP02;
  3938. }
  3939. }
  3940. ENDLOOP02:
  3941. break;
  3942. case OP_VSPACE:
  3943. for (i = Lmin; i < Lmax; i++)
  3944. {
  3945. if (Feptr >= mb->end_subject)
  3946. {
  3947. SCHECK_PARTIAL();
  3948. break;
  3949. }
  3950. switch(*Feptr)
  3951. {
  3952. default: goto ENDLOOP03;
  3953. VSPACE_BYTE_CASES:
  3954. #if PCRE2_CODE_UNIT_WIDTH != 8
  3955. VSPACE_MULTIBYTE_CASES:
  3956. #endif
  3957. Feptr++; break;
  3958. }
  3959. }
  3960. ENDLOOP03:
  3961. break;
  3962. case OP_NOT_DIGIT:
  3963. for (i = Lmin; i < Lmax; i++)
  3964. {
  3965. if (Feptr >= mb->end_subject)
  3966. {
  3967. SCHECK_PARTIAL();
  3968. break;
  3969. }
  3970. if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
  3971. break;
  3972. Feptr++;
  3973. }
  3974. break;
  3975. case OP_DIGIT:
  3976. for (i = Lmin; i < Lmax; i++)
  3977. {
  3978. if (Feptr >= mb->end_subject)
  3979. {
  3980. SCHECK_PARTIAL();
  3981. break;
  3982. }
  3983. if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
  3984. break;
  3985. Feptr++;
  3986. }
  3987. break;
  3988. case OP_NOT_WHITESPACE:
  3989. for (i = Lmin; i < Lmax; i++)
  3990. {
  3991. if (Feptr >= mb->end_subject)
  3992. {
  3993. SCHECK_PARTIAL();
  3994. break;
  3995. }
  3996. if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
  3997. break;
  3998. Feptr++;
  3999. }
  4000. break;
  4001. case OP_WHITESPACE:
  4002. for (i = Lmin; i < Lmax; i++)
  4003. {
  4004. if (Feptr >= mb->end_subject)
  4005. {
  4006. SCHECK_PARTIAL();
  4007. break;
  4008. }
  4009. if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
  4010. break;
  4011. Feptr++;
  4012. }
  4013. break;
  4014. case OP_NOT_WORDCHAR:
  4015. for (i = Lmin; i < Lmax; i++)
  4016. {
  4017. if (Feptr >= mb->end_subject)
  4018. {
  4019. SCHECK_PARTIAL();
  4020. break;
  4021. }
  4022. if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
  4023. break;
  4024. Feptr++;
  4025. }
  4026. break;
  4027. case OP_WORDCHAR:
  4028. for (i = Lmin; i < Lmax; i++)
  4029. {
  4030. if (Feptr >= mb->end_subject)
  4031. {
  4032. SCHECK_PARTIAL();
  4033. break;
  4034. }
  4035. if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
  4036. break;
  4037. Feptr++;
  4038. }
  4039. break;
  4040. default:
  4041. return PCRE2_ERROR_INTERNAL;
  4042. }
  4043. if (reptype == REPTYPE_POS) continue; /* No backtracking */
  4044. for (;;)
  4045. {
  4046. if (Feptr == Lstart_eptr) break;
  4047. RMATCH(Fecode, RM34);
  4048. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4049. Feptr--;
  4050. if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
  4051. Feptr[-1] == CHAR_CR) Feptr--;
  4052. }
  4053. }
  4054. }
  4055. break; /* End of repeat character type processing */
  4056. #undef Lstart_eptr
  4057. #undef Lmin
  4058. #undef Lmax
  4059. #undef Lctype
  4060. #undef Lpropvalue
  4061. /* ===================================================================== */
  4062. /* Match a back reference, possibly repeatedly. Look past the end of the
  4063. item to see if there is repeat information following. The OP_REF and
  4064. OP_REFI opcodes are used for a reference to a numbered group or to a
  4065. non-duplicated named group. For a duplicated named group, OP_DNREF and
  4066. OP_DNREFI are used. In this case we must scan the list of groups to which
  4067. the name refers, and use the first one that is set. */
  4068. #define Lmin F->temp_32[0]
  4069. #define Lmax F->temp_32[1]
  4070. #define Lcaseless F->temp_32[2]
  4071. #define Lstart F->temp_sptr[0]
  4072. #define Loffset F->temp_size
  4073. case OP_DNREF:
  4074. case OP_DNREFI:
  4075. Lcaseless = (Fop == OP_DNREFI);
  4076. {
  4077. int count = GET2(Fecode, 1+IMM2_SIZE);
  4078. PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
  4079. Fecode += 1 + 2*IMM2_SIZE;
  4080. while (count-- > 0)
  4081. {
  4082. Loffset = (GET2(slot, 0) << 1) - 2;
  4083. if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
  4084. slot += mb->name_entry_size;
  4085. }
  4086. }
  4087. goto REF_REPEAT;
  4088. case OP_REF:
  4089. case OP_REFI:
  4090. Lcaseless = (Fop == OP_REFI);
  4091. Loffset = (GET2(Fecode, 1) << 1) - 2;
  4092. Fecode += 1 + IMM2_SIZE;
  4093. /* Set up for repetition, or handle the non-repeated case. The maximum and
  4094. minimum must be in the heap frame, but as they are short-term values, we
  4095. use temporary fields. */
  4096. REF_REPEAT:
  4097. switch (*Fecode)
  4098. {
  4099. case OP_CRSTAR:
  4100. case OP_CRMINSTAR:
  4101. case OP_CRPLUS:
  4102. case OP_CRMINPLUS:
  4103. case OP_CRQUERY:
  4104. case OP_CRMINQUERY:
  4105. fc = *Fecode++ - OP_CRSTAR;
  4106. Lmin = rep_min[fc];
  4107. Lmax = rep_max[fc];
  4108. reptype = rep_typ[fc];
  4109. break;
  4110. case OP_CRRANGE:
  4111. case OP_CRMINRANGE:
  4112. Lmin = GET2(Fecode, 1);
  4113. Lmax = GET2(Fecode, 1 + IMM2_SIZE);
  4114. reptype = rep_typ[*Fecode - OP_CRSTAR];
  4115. if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */
  4116. Fecode += 1 + 2 * IMM2_SIZE;
  4117. break;
  4118. default: /* No repeat follows */
  4119. {
  4120. rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
  4121. if (rrc != 0)
  4122. {
  4123. if (rrc > 0) Feptr = mb->end_subject; /* Partial match */
  4124. CHECK_PARTIAL();
  4125. RRETURN(MATCH_NOMATCH);
  4126. }
  4127. }
  4128. Feptr += length;
  4129. continue; /* With the main loop */
  4130. }
  4131. /* Handle repeated back references. If a set group has length zero, just
  4132. continue with the main loop, because it matches however many times. For an
  4133. unset reference, if the minimum is zero, we can also just continue. We can
  4134. also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
  4135. group behave as a zero-length group. For any other unset cases, carrying
  4136. on will result in NOMATCH. */
  4137. if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
  4138. {
  4139. if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
  4140. }
  4141. else /* Group is not set */
  4142. {
  4143. if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
  4144. continue;
  4145. }
  4146. /* First, ensure the minimum number of matches are present. */
  4147. for (i = 1; i <= Lmin; i++)
  4148. {
  4149. PCRE2_SIZE slength;
  4150. rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
  4151. if (rrc != 0)
  4152. {
  4153. if (rrc > 0) Feptr = mb->end_subject; /* Partial match */
  4154. CHECK_PARTIAL();
  4155. RRETURN(MATCH_NOMATCH);
  4156. }
  4157. Feptr += slength;
  4158. }
  4159. /* If min = max, we are done. They are not both allowed to be zero. */
  4160. if (Lmin == Lmax) continue;
  4161. /* If minimizing, keep trying and advancing the pointer. */
  4162. if (reptype == REPTYPE_MIN)
  4163. {
  4164. for (;;)
  4165. {
  4166. PCRE2_SIZE slength;
  4167. RMATCH(Fecode, RM20);
  4168. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4169. if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
  4170. rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
  4171. if (rrc != 0)
  4172. {
  4173. if (rrc > 0) Feptr = mb->end_subject; /* Partial match */
  4174. CHECK_PARTIAL();
  4175. RRETURN(MATCH_NOMATCH);
  4176. }
  4177. Feptr += slength;
  4178. }
  4179. /* Control never gets here */
  4180. }
  4181. /* If maximizing, find the longest string and work backwards, as long as
  4182. the matched lengths for each iteration are the same. */
  4183. else
  4184. {
  4185. BOOL samelengths = TRUE;
  4186. Lstart = Feptr; /* Starting position */
  4187. Flength = Fovector[Loffset+1] - Fovector[Loffset];
  4188. for (i = Lmin; i < Lmax; i++)
  4189. {
  4190. PCRE2_SIZE slength;
  4191. rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
  4192. if (rrc != 0)
  4193. {
  4194. /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
  4195. the soft partial matching case. */
  4196. if (rrc > 0 && mb->partial != 0 &&
  4197. mb->end_subject > mb->start_used_ptr)
  4198. {
  4199. mb->hitend = TRUE;
  4200. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  4201. }
  4202. break;
  4203. }
  4204. if (slength != Flength) samelengths = FALSE;
  4205. Feptr += slength;
  4206. }
  4207. /* If the length matched for each repetition is the same as the length of
  4208. the captured group, we can easily work backwards. This is the normal
  4209. case. However, in caseless UTF-8 mode there are pairs of case-equivalent
  4210. characters whose lengths (in terms of code units) differ. However, this
  4211. is very rare, so we handle it by re-matching fewer and fewer times. */
  4212. if (samelengths)
  4213. {
  4214. while (Feptr >= Lstart)
  4215. {
  4216. RMATCH(Fecode, RM21);
  4217. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4218. Feptr -= Flength;
  4219. }
  4220. }
  4221. /* The rare case of non-matching lengths. Re-scan the repetition for each
  4222. iteration. We know that match_ref() will succeed every time. */
  4223. else
  4224. {
  4225. Lmax = i;
  4226. for (;;)
  4227. {
  4228. RMATCH(Fecode, RM22);
  4229. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4230. if (Feptr == Lstart) break; /* Failed after minimal repetition */
  4231. Feptr = Lstart;
  4232. Lmax--;
  4233. for (i = Lmin; i < Lmax; i++)
  4234. {
  4235. PCRE2_SIZE slength;
  4236. (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
  4237. Feptr += slength;
  4238. }
  4239. }
  4240. }
  4241. RRETURN(MATCH_NOMATCH);
  4242. }
  4243. /* Control never gets here */
  4244. #undef Lcaseless
  4245. #undef Lmin
  4246. #undef Lmax
  4247. #undef Lstart
  4248. #undef Loffset
  4249. /* ========================================================================= */
  4250. /* Opcodes for the start of various parenthesized items */
  4251. /* ========================================================================= */
  4252. /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
  4253. (*THEN) is within the current branch by comparing the address of OP_THEN
  4254. that is passed back with the end of the branch. If (*THEN) is within the
  4255. current branch, and the branch is one of two or more alternatives (it
  4256. either starts or ends with OP_ALT), we have reached the limit of THEN's
  4257. action, so convert the return code to NOMATCH, which will cause normal
  4258. backtracking to happen from now on. Otherwise, THEN is passed back to an
  4259. outer alternative. This implements Perl's treatment of parenthesized
  4260. groups, where a group not containing | does not affect the current
  4261. alternative, that is, (X) is NOT the same as (X|(*F)). */
  4262. /* ===================================================================== */
  4263. /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
  4264. bracket group, indicating that it may occur zero times. It may repeat
  4265. infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
  4266. the pattern. Brackets with fixed upper repeat limits are compiled as a
  4267. number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
  4268. Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
  4269. #define Lnext_ecode F->temp_sptr[0]
  4270. case OP_BRAZERO:
  4271. Lnext_ecode = Fecode + 1;
  4272. RMATCH(Lnext_ecode, RM9);
  4273. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4274. do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
  4275. Fecode = Lnext_ecode + 1 + LINK_SIZE;
  4276. break;
  4277. case OP_BRAMINZERO:
  4278. Lnext_ecode = Fecode + 1;
  4279. do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
  4280. RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
  4281. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4282. Fecode++;
  4283. break;
  4284. #undef Lnext_ecode
  4285. case OP_SKIPZERO:
  4286. Fecode++;
  4287. do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
  4288. Fecode += 1 + LINK_SIZE;
  4289. break;
  4290. /* ===================================================================== */
  4291. /* Handle possessive brackets with an unlimited repeat. The end of these
  4292. brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
  4293. going further in the pattern. */
  4294. #define Lframe_type F->temp_32[0]
  4295. #define Lmatched_once F->temp_32[1]
  4296. #define Lzero_allowed F->temp_32[2]
  4297. #define Lstart_eptr F->temp_sptr[0]
  4298. #define Lstart_group F->temp_sptr[1]
  4299. case OP_BRAPOSZERO:
  4300. Lzero_allowed = TRUE; /* Zero repeat is allowed */
  4301. Fecode += 1;
  4302. if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
  4303. goto POSSESSIVE_CAPTURE;
  4304. goto POSSESSIVE_NON_CAPTURE;
  4305. case OP_BRAPOS:
  4306. case OP_SBRAPOS:
  4307. Lzero_allowed = FALSE; /* Zero repeat not allowed */
  4308. POSSESSIVE_NON_CAPTURE:
  4309. Lframe_type = GF_NOCAPTURE; /* Remembered frame type */
  4310. goto POSSESSIVE_GROUP;
  4311. case OP_CBRAPOS:
  4312. case OP_SCBRAPOS:
  4313. Lzero_allowed = FALSE; /* Zero repeat not allowed */
  4314. POSSESSIVE_CAPTURE:
  4315. number = GET2(Fecode, 1+LINK_SIZE);
  4316. Lframe_type = GF_CAPTURE | number; /* Remembered frame type */
  4317. POSSESSIVE_GROUP:
  4318. Lmatched_once = FALSE; /* Never matched */
  4319. Lstart_group = Fecode; /* Start of this group */
  4320. for (;;)
  4321. {
  4322. Lstart_eptr = Feptr; /* Position at group start */
  4323. group_frame_type = Lframe_type;
  4324. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
  4325. if (rrc == MATCH_KETRPOS)
  4326. {
  4327. Lmatched_once = TRUE; /* Matched at least once */
  4328. if (Feptr == Lstart_eptr) /* Empty match; skip to end */
  4329. {
  4330. do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
  4331. break;
  4332. }
  4333. Fecode = Lstart_group;
  4334. continue;
  4335. }
  4336. /* See comment above about handling THEN. */
  4337. if (rrc == MATCH_THEN)
  4338. {
  4339. PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
  4340. if (mb->verb_ecode_ptr < next_ecode &&
  4341. (*Fecode == OP_ALT || *next_ecode == OP_ALT))
  4342. rrc = MATCH_NOMATCH;
  4343. }
  4344. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4345. Fecode += GET(Fecode, 1);
  4346. if (*Fecode != OP_ALT) break;
  4347. }
  4348. /* Success if matched something or zero repeat allowed */
  4349. if (Lmatched_once || Lzero_allowed)
  4350. {
  4351. Fecode += 1 + LINK_SIZE;
  4352. break;
  4353. }
  4354. RRETURN(MATCH_NOMATCH);
  4355. #undef Lmatched_once
  4356. #undef Lzero_allowed
  4357. #undef Lframe_type
  4358. #undef Lstart_eptr
  4359. #undef Lstart_group
  4360. /* ===================================================================== */
  4361. /* Handle non-capturing brackets that cannot match an empty string. When we
  4362. get to the final alternative within the brackets, as long as there are no
  4363. THEN's in the pattern, we can optimize by not recording a new backtracking
  4364. point. (Ideally we should test for a THEN within this group, but we don't
  4365. have that information.) Don't do this if we are at the very top level,
  4366. however, because that would make handling assertions and once-only brackets
  4367. messier when there is nothing to go back to. */
  4368. #define Lframe_type F->temp_32[0] /* Set for all that use GROUPLOOP */
  4369. #define Lnext_branch F->temp_sptr[0] /* Used only in OP_BRA handling */
  4370. case OP_BRA:
  4371. if (mb->hasthen || Frdepth == 0)
  4372. {
  4373. Lframe_type = 0;
  4374. goto GROUPLOOP;
  4375. }
  4376. for (;;)
  4377. {
  4378. Lnext_branch = Fecode + GET(Fecode, 1);
  4379. if (*Lnext_branch != OP_ALT) break;
  4380. /* This is never the final branch. We do not need to test for MATCH_THEN
  4381. here because this code is not used when there is a THEN in the pattern. */
  4382. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
  4383. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4384. Fecode = Lnext_branch;
  4385. }
  4386. /* Hit the start of the final branch. Continue at this level. */
  4387. Fecode += PRIV(OP_lengths)[*Fecode];
  4388. break;
  4389. #undef Lnext_branch
  4390. /* ===================================================================== */
  4391. /* Handle a capturing bracket, other than those that are possessive with an
  4392. unlimited repeat. */
  4393. case OP_CBRA:
  4394. case OP_SCBRA:
  4395. Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
  4396. goto GROUPLOOP;
  4397. /* ===================================================================== */
  4398. /* Atomic groups and non-capturing brackets that can match an empty string
  4399. must record a backtracking point and also set up a chained frame. */
  4400. case OP_ONCE:
  4401. case OP_SBRA:
  4402. Lframe_type = GF_NOCAPTURE | Fop;
  4403. GROUPLOOP:
  4404. for (;;)
  4405. {
  4406. group_frame_type = Lframe_type;
  4407. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
  4408. if (rrc == MATCH_THEN)
  4409. {
  4410. PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
  4411. if (mb->verb_ecode_ptr < next_ecode &&
  4412. (*Fecode == OP_ALT || *next_ecode == OP_ALT))
  4413. rrc = MATCH_NOMATCH;
  4414. }
  4415. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4416. Fecode += GET(Fecode, 1);
  4417. if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
  4418. }
  4419. /* Control never reaches here. */
  4420. #undef Lframe_type
  4421. /* ===================================================================== */
  4422. /* Recursion either matches the current regex, or some subexpression. The
  4423. offset data is the offset to the starting bracket from the start of the
  4424. whole pattern. (This is so that it works from duplicated subpatterns.) */
  4425. #define Lframe_type F->temp_32[0]
  4426. #define Lstart_branch F->temp_sptr[0]
  4427. case OP_RECURSE:
  4428. bracode = mb->start_code + GET(Fecode, 1);
  4429. number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
  4430. /* If we are already in a recursion, check for repeating the same one
  4431. without advancing the subject pointer. This should catch convoluted mutual
  4432. recursions. (Some simple cases are caught at compile time.) */
  4433. if (Fcurrent_recurse != RECURSE_UNSET)
  4434. {
  4435. offset = Flast_group_offset;
  4436. while (offset != PCRE2_UNSET)
  4437. {
  4438. N = (heapframe *)((char *)mb->match_frames + offset);
  4439. P = (heapframe *)((char *)N - frame_size);
  4440. if (N->group_frame_type == (GF_RECURSE | number))
  4441. {
  4442. if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
  4443. break;
  4444. }
  4445. offset = P->last_group_offset;
  4446. }
  4447. }
  4448. /* Now run the recursion, branch by branch. */
  4449. Lstart_branch = bracode;
  4450. Lframe_type = GF_RECURSE | number;
  4451. for (;;)
  4452. {
  4453. PCRE2_SPTR next_ecode;
  4454. group_frame_type = Lframe_type;
  4455. RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
  4456. next_ecode = Lstart_branch + GET(Lstart_branch,1);
  4457. /* Handle backtracking verbs, which are defined in a range that can
  4458. easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
  4459. escape beyond a recursion; they cause a NOMATCH for the entire recursion.
  4460. When one of these verbs triggers, the current recursion group number is
  4461. recorded. If it matches the recursion we are processing, the verb
  4462. happened within the recursion and we must deal with it. Otherwise it must
  4463. have happened after the recursion completed, and so has to be passed
  4464. back. See comment above about handling THEN. */
  4465. if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
  4466. mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
  4467. {
  4468. if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
  4469. (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
  4470. rrc = MATCH_NOMATCH;
  4471. else RRETURN(MATCH_NOMATCH);
  4472. }
  4473. /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
  4474. OP_ACCEPT code. Nothing needs to be done here. */
  4475. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4476. Lstart_branch = next_ecode;
  4477. if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
  4478. }
  4479. /* Control never reaches here. */
  4480. #undef Lframe_type
  4481. #undef Lstart_branch
  4482. /* ===================================================================== */
  4483. /* Positive assertions are like other groups except that PCRE doesn't allow
  4484. the effect of (*THEN) to escape beyond an assertion; it is therefore
  4485. treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
  4486. captures retained. Any other return is an error. */
  4487. #define Lframe_type F->temp_32[0]
  4488. case OP_ASSERT:
  4489. case OP_ASSERTBACK:
  4490. Lframe_type = GF_NOCAPTURE | Fop;
  4491. for (;;)
  4492. {
  4493. group_frame_type = Lframe_type;
  4494. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
  4495. if (rrc == MATCH_ACCEPT)
  4496. {
  4497. memcpy(Fovector,
  4498. (char *)assert_accept_frame + offsetof(heapframe, ovector),
  4499. assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
  4500. Foffset_top = assert_accept_frame->offset_top;
  4501. break;
  4502. }
  4503. if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
  4504. Fecode += GET(Fecode, 1);
  4505. if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
  4506. }
  4507. do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
  4508. Fecode += 1 + LINK_SIZE;
  4509. break;
  4510. #undef Lframe_type
  4511. /* ===================================================================== */
  4512. /* Handle negative assertions. Loop for each non-matching branch as for
  4513. positive assertions. */
  4514. #define Lframe_type F->temp_32[0]
  4515. case OP_ASSERT_NOT:
  4516. case OP_ASSERTBACK_NOT:
  4517. Lframe_type = GF_NOCAPTURE | Fop;
  4518. for (;;)
  4519. {
  4520. group_frame_type = Lframe_type;
  4521. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
  4522. switch(rrc)
  4523. {
  4524. case MATCH_ACCEPT: /* Assertion matched, therefore it fails. */
  4525. case MATCH_MATCH:
  4526. RRETURN (MATCH_NOMATCH);
  4527. case MATCH_NOMATCH: /* Branch failed, try next if present. */
  4528. case MATCH_THEN:
  4529. Fecode += GET(Fecode, 1);
  4530. if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
  4531. break;
  4532. case MATCH_COMMIT: /* Assertion forced to fail, therefore continue. */
  4533. case MATCH_SKIP:
  4534. case MATCH_PRUNE:
  4535. do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
  4536. goto ASSERT_NOT_FAILED;
  4537. default: /* Pass back any other return */
  4538. RRETURN(rrc);
  4539. }
  4540. }
  4541. /* None of the branches have matched or there was a backtrack to (*COMMIT),
  4542. (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
  4543. negative assertion, so carry on. */
  4544. ASSERT_NOT_FAILED:
  4545. Fecode += 1 + LINK_SIZE;
  4546. break;
  4547. #undef Lframe_type
  4548. /* ===================================================================== */
  4549. /* The callout item calls an external function, if one is provided, passing
  4550. details of the match so far. This is mainly for debugging, though the
  4551. function is able to force a failure. */
  4552. case OP_CALLOUT:
  4553. case OP_CALLOUT_STR:
  4554. rrc = do_callout(F, mb, &length);
  4555. if (rrc > 0) RRETURN(MATCH_NOMATCH);
  4556. if (rrc < 0) RRETURN(rrc);
  4557. Fecode += length;
  4558. break;
  4559. /* ===================================================================== */
  4560. /* Conditional group: compilation checked that there are no more than two
  4561. branches. If the condition is false, skipping the first branch takes us
  4562. past the end of the item if there is only one branch, but that's exactly
  4563. what we want. */
  4564. case OP_COND:
  4565. case OP_SCOND:
  4566. /* The variable Flength will be added to Fecode when the condition is
  4567. false, to get to the second branch. Setting it to the offset to the ALT or
  4568. KET, then incrementing Fecode achieves this effect. However, if the second
  4569. branch is non-existent, we must point to the KET so that the end of the
  4570. group is correctly processed. We now have Fecode pointing to the condition
  4571. or callout. */
  4572. Flength = GET(Fecode, 1); /* Offset to the second branch */
  4573. if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
  4574. Fecode += 1 + LINK_SIZE; /* From this opcode */
  4575. /* Because of the way auto-callout works during compile, a callout item is
  4576. inserted between OP_COND and an assertion condition. Such a callout can
  4577. also be inserted manually. */
  4578. if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
  4579. {
  4580. rrc = do_callout(F, mb, &length);
  4581. if (rrc > 0) RRETURN(MATCH_NOMATCH);
  4582. if (rrc < 0) RRETURN(rrc);
  4583. /* Advance Fecode past the callout, so it now points to the condition. We
  4584. must adjust Flength so that the value of Fecode+Flength is unchanged. */
  4585. Fecode += length;
  4586. Flength -= length;
  4587. }
  4588. /* Test the various possible conditions */
  4589. condition = FALSE;
  4590. switch(*Fecode)
  4591. {
  4592. case OP_RREF: /* Group recursion test */
  4593. if (Fcurrent_recurse != RECURSE_UNSET)
  4594. {
  4595. number = GET2(Fecode, 1);
  4596. condition = (number == RREF_ANY || number == Fcurrent_recurse);
  4597. }
  4598. break;
  4599. case OP_DNRREF: /* Duplicate named group recursion test */
  4600. if (Fcurrent_recurse != RECURSE_UNSET)
  4601. {
  4602. int count = GET2(Fecode, 1 + IMM2_SIZE);
  4603. PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
  4604. while (count-- > 0)
  4605. {
  4606. number = GET2(slot, 0);
  4607. condition = number == Fcurrent_recurse;
  4608. if (condition) break;
  4609. slot += mb->name_entry_size;
  4610. }
  4611. }
  4612. break;
  4613. case OP_CREF: /* Numbered group used test */
  4614. offset = (GET2(Fecode, 1) << 1) - 2; /* Doubled ref number */
  4615. condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
  4616. break;
  4617. case OP_DNCREF: /* Duplicate named group used test */
  4618. {
  4619. int count = GET2(Fecode, 1 + IMM2_SIZE);
  4620. PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
  4621. while (count-- > 0)
  4622. {
  4623. offset = (GET2(slot, 0) << 1) - 2;
  4624. condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
  4625. if (condition) break;
  4626. slot += mb->name_entry_size;
  4627. }
  4628. }
  4629. break;
  4630. case OP_FALSE:
  4631. case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
  4632. break;
  4633. case OP_TRUE:
  4634. condition = TRUE;
  4635. break;
  4636. /* The condition is an assertion. Run code similar to the assertion code
  4637. above. */
  4638. #define Lpositive F->temp_32[0]
  4639. #define Lstart_branch F->temp_sptr[0]
  4640. default:
  4641. Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
  4642. Lstart_branch = Fecode;
  4643. for (;;)
  4644. {
  4645. group_frame_type = GF_CONDASSERT | *Fecode;
  4646. RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
  4647. switch(rrc)
  4648. {
  4649. case MATCH_ACCEPT: /* Save captures */
  4650. memcpy(Fovector,
  4651. (char *)assert_accept_frame + offsetof(heapframe, ovector),
  4652. assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
  4653. Foffset_top = assert_accept_frame->offset_top;
  4654. /* Fall through */
  4655. /* In the case of a match, the captures have already been put into
  4656. the current frame. */
  4657. case MATCH_MATCH:
  4658. condition = Lpositive; /* TRUE for positive assertion */
  4659. break;
  4660. /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
  4661. assertion; it is therefore always treated as NOMATCH. */
  4662. case MATCH_NOMATCH:
  4663. case MATCH_THEN:
  4664. Lstart_branch += GET(Lstart_branch, 1);
  4665. if (*Lstart_branch == OP_ALT) continue; /* Try next branch */
  4666. condition = !Lpositive; /* TRUE for negative assertion */
  4667. break;
  4668. /* These force no match without checking other branches. */
  4669. case MATCH_COMMIT:
  4670. case MATCH_SKIP:
  4671. case MATCH_PRUNE:
  4672. condition = !Lpositive;
  4673. break;
  4674. default:
  4675. RRETURN(rrc);
  4676. }
  4677. break; /* Out of the branch loop */
  4678. }
  4679. /* If the condition is true, find the end of the assertion so that
  4680. advancing past it gets us to the start of the first branch. */
  4681. if (condition)
  4682. {
  4683. do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
  4684. }
  4685. break; /* End of assertion condition */
  4686. }
  4687. #undef Lpositive
  4688. #undef Lstart_branch
  4689. /* Choose branch according to the condition. */
  4690. Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
  4691. /* If the opcode is OP_SCOND it means we are at a repeated conditional
  4692. group that might match an empty string. We must therefore descend a level
  4693. so that the start is remembered for checking. For OP_COND we can just
  4694. continue at this level. */
  4695. if (Fop == OP_SCOND)
  4696. {
  4697. group_frame_type = GF_NOCAPTURE | Fop;
  4698. RMATCH(Fecode, RM35);
  4699. RRETURN(rrc);
  4700. }
  4701. break;
  4702. /* ========================================================================= */
  4703. /* End of start of parenthesis opcodes */
  4704. /* ========================================================================= */
  4705. /* ===================================================================== */
  4706. /* Move the subject pointer back. This occurs only at the start of each
  4707. branch of a lookbehind assertion. If we are too close to the start to move
  4708. back, fail. When working with UTF-8 we move back a number of characters,
  4709. not bytes. */
  4710. case OP_REVERSE:
  4711. number = GET(Fecode, 1);
  4712. #ifdef SUPPORT_UNICODE
  4713. if (utf)
  4714. {
  4715. while (number-- > 0)
  4716. {
  4717. if (Feptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
  4718. Feptr--;
  4719. BACKCHAR(Feptr);
  4720. }
  4721. }
  4722. else
  4723. #endif
  4724. /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
  4725. {
  4726. if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
  4727. Feptr -= number;
  4728. }
  4729. /* Save the earliest consulted character, then skip to next op code */
  4730. if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
  4731. Fecode += 1 + LINK_SIZE;
  4732. break;
  4733. /* ===================================================================== */
  4734. /* An alternation is the end of a branch; scan along to find the end of the
  4735. bracketed group. */
  4736. case OP_ALT:
  4737. do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
  4738. break;
  4739. /* ===================================================================== */
  4740. /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
  4741. starting frame was added to the chained frames in order to remember the
  4742. starting subject position for the group. */
  4743. case OP_KET:
  4744. case OP_KETRMIN:
  4745. case OP_KETRMAX:
  4746. case OP_KETRPOS:
  4747. bracode = Fecode - GET(Fecode, 1);
  4748. /* Point N to the frame at the start of the most recent group.
  4749. Remember the subject pointer at the start of the group. */
  4750. if (*bracode != OP_BRA && *bracode != OP_COND)
  4751. {
  4752. N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
  4753. P = (heapframe *)((char *)N - frame_size);
  4754. Flast_group_offset = P->last_group_offset;
  4755. #ifdef DEBUG_SHOW_RMATCH
  4756. fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
  4757. N->rdepth, N->group_frame_type,
  4758. (char *)P->eptr - (char *)mb->start_subject);
  4759. #endif
  4760. /* If we are at the end of an assertion that is a condition, return a
  4761. match, discarding any intermediate backtracking points. Copy back the
  4762. captures into the frame before N so that they are set on return. Doing
  4763. this for all assertions, both positive and negative, seems to match what
  4764. Perl does. */
  4765. if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
  4766. {
  4767. memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
  4768. Foffset_top * sizeof(PCRE2_SIZE));
  4769. P->offset_top = Foffset_top;
  4770. Fback_frame = (char *)F - (char *)P;
  4771. RRETURN(MATCH_MATCH);
  4772. }
  4773. }
  4774. else P = NULL; /* Indicates starting frame not recorded */
  4775. /* The group was not a conditional assertion. */
  4776. switch (*bracode)
  4777. {
  4778. case OP_BRA: /* No need to do anything for these */
  4779. case OP_COND:
  4780. case OP_SCOND:
  4781. break;
  4782. /* Positive assertions are like OP_ONCE, except that in addition the
  4783. subject pointer must be put back to where it was at the start of the
  4784. assertion. */
  4785. case OP_ASSERT:
  4786. case OP_ASSERTBACK:
  4787. if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
  4788. Feptr = P->eptr;
  4789. /* Fall through */
  4790. /* For an atomic group, discard internal backtracking points. We must
  4791. also ensure that any remaining branches within the top-level of the group
  4792. are not tried. Do this by adjusting the code pointer within the backtrack
  4793. frame so that it points to the final branch. */
  4794. case OP_ONCE:
  4795. Fback_frame = ((char *)F - (char *)P) + frame_size;
  4796. for (;;)
  4797. {
  4798. uint32_t y = GET(P->ecode,1);
  4799. if ((P->ecode)[y] != OP_ALT) break;
  4800. P->ecode += y;
  4801. }
  4802. break;
  4803. /* A matching negative assertion returns MATCH, which is turned into
  4804. NOMATCH at the assertion level. */
  4805. case OP_ASSERT_NOT:
  4806. case OP_ASSERTBACK_NOT:
  4807. RRETURN(MATCH_MATCH);
  4808. /* Whole-pattern recursion is coded as a recurse into group 0, so it
  4809. won't be picked up here. Instead, we catch it when the OP_END is reached.
  4810. Other recursion is handled here. */
  4811. case OP_CBRA:
  4812. case OP_CBRAPOS:
  4813. case OP_SCBRA:
  4814. case OP_SCBRAPOS:
  4815. number = GET2(bracode, 1+LINK_SIZE);
  4816. /* Handle a recursively called group. We reinstate the previous set of
  4817. captures and then carry on after the recursion call. */
  4818. if (Fcurrent_recurse == number)
  4819. {
  4820. P = (heapframe *)((char *)N - frame_size);
  4821. memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
  4822. P->offset_top * sizeof(PCRE2_SIZE));
  4823. Foffset_top = P->offset_top;
  4824. Fcapture_last = P->capture_last;
  4825. Fcurrent_recurse = P->current_recurse;
  4826. Fecode = P->ecode + 1 + LINK_SIZE;
  4827. continue; /* With next opcode */
  4828. }
  4829. /* Deal with actual capturing. */
  4830. offset = (number << 1) - 2;
  4831. Fcapture_last = number;
  4832. Fovector[offset] = P->eptr - mb->start_subject;
  4833. Fovector[offset+1] = Feptr - mb->start_subject;
  4834. if (offset >= Foffset_top) Foffset_top = offset + 2;
  4835. break;
  4836. } /* End actions relating to the starting opcode */
  4837. /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
  4838. and return the MATCH_KETRPOS. This makes it possible to do the repeats one
  4839. at a time from the outer level. This must precede the empty string test -
  4840. in this case that test is done at the outer level. */
  4841. if (*Fecode == OP_KETRPOS)
  4842. {
  4843. memcpy((char *)P + offsetof(heapframe, eptr),
  4844. (char *)F + offsetof(heapframe, eptr),
  4845. frame_copy_size);
  4846. RRETURN(MATCH_KETRPOS);
  4847. }
  4848. /* Handle the different kinds of closing brackets. A non-repeating ket
  4849. needs no special action, just continuing at this level. This also happens
  4850. for the repeating kets if the group matched no characters, in order to
  4851. forcibly break infinite loops. Otherwise, the repeating kets try the rest
  4852. of the pattern or restart from the preceding bracket, in the appropriate
  4853. order. */
  4854. if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
  4855. {
  4856. if (Fop == OP_KETRMIN)
  4857. {
  4858. RMATCH(Fecode + 1 + LINK_SIZE, RM6);
  4859. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4860. Fecode -= GET(Fecode, 1);
  4861. break; /* End of ket processing */
  4862. }
  4863. /* Repeat the maximum number of times (KETRMAX) */
  4864. RMATCH(bracode, RM7);
  4865. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  4866. }
  4867. /* Carry on at this level for a non-repeating ket, or after matching an
  4868. empty string, or after repeating for a maximum number of times. */
  4869. Fecode += 1 + LINK_SIZE;
  4870. break;
  4871. /* ===================================================================== */
  4872. /* Start and end of line assertions, not multiline mode. */
  4873. case OP_CIRC: /* Start of line, unless PCRE2_NOTBOL is set. */
  4874. if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
  4875. RRETURN(MATCH_NOMATCH);
  4876. Fecode++;
  4877. break;
  4878. case OP_SOD: /* Unconditional start of subject */
  4879. if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
  4880. Fecode++;
  4881. break;
  4882. /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
  4883. terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
  4884. case OP_DOLL:
  4885. if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
  4886. if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
  4887. /* Fall through */
  4888. /* Unconditional end of subject assertion (\z) */
  4889. case OP_EOD:
  4890. if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
  4891. SCHECK_PARTIAL();
  4892. Fecode++;
  4893. break;
  4894. /* End of subject or ending \n assertion (\Z) */
  4895. case OP_EODN:
  4896. ASSERT_NL_OR_EOS:
  4897. if (Feptr < mb->end_subject &&
  4898. (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
  4899. {
  4900. if (mb->partial != 0 &&
  4901. Feptr + 1 >= mb->end_subject &&
  4902. NLBLOCK->nltype == NLTYPE_FIXED &&
  4903. NLBLOCK->nllen == 2 &&
  4904. UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
  4905. {
  4906. mb->hitend = TRUE;
  4907. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  4908. }
  4909. RRETURN(MATCH_NOMATCH);
  4910. }
  4911. /* Either at end of string or \n before end. */
  4912. SCHECK_PARTIAL();
  4913. Fecode++;
  4914. break;
  4915. /* ===================================================================== */
  4916. /* Start and end of line assertions, multiline mode. */
  4917. /* Start of subject unless notbol, or after any newline except for one at
  4918. the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
  4919. case OP_CIRCM:
  4920. if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
  4921. RRETURN(MATCH_NOMATCH);
  4922. if (Feptr != mb->start_subject &&
  4923. ((Feptr == mb->end_subject &&
  4924. (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
  4925. !WAS_NEWLINE(Feptr)))
  4926. RRETURN(MATCH_NOMATCH);
  4927. Fecode++;
  4928. break;
  4929. /* Assert before any newline, or before end of subject unless noteol is
  4930. set. */
  4931. case OP_DOLLM:
  4932. if (Feptr < mb->end_subject)
  4933. {
  4934. if (!IS_NEWLINE(Feptr))
  4935. {
  4936. if (mb->partial != 0 &&
  4937. Feptr + 1 >= mb->end_subject &&
  4938. NLBLOCK->nltype == NLTYPE_FIXED &&
  4939. NLBLOCK->nllen == 2 &&
  4940. UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
  4941. {
  4942. mb->hitend = TRUE;
  4943. if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
  4944. }
  4945. RRETURN(MATCH_NOMATCH);
  4946. }
  4947. }
  4948. else
  4949. {
  4950. if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
  4951. SCHECK_PARTIAL();
  4952. }
  4953. Fecode++;
  4954. break;
  4955. /* ===================================================================== */
  4956. /* Start of match assertion */
  4957. case OP_SOM:
  4958. if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
  4959. Fecode++;
  4960. break;
  4961. /* ===================================================================== */
  4962. /* Reset the start of match point */
  4963. case OP_SET_SOM:
  4964. Fstart_match = Feptr;
  4965. Fecode++;
  4966. break;
  4967. /* ===================================================================== */
  4968. /* Word boundary assertions. Find out if the previous and current
  4969. characters are "word" characters. It takes a bit more work in UTF mode.
  4970. Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
  4971. not set. When it is set, use Unicode properties if available, even when not
  4972. in UTF mode. Remember the earliest and latest consulted characters. */
  4973. case OP_NOT_WORD_BOUNDARY:
  4974. case OP_WORD_BOUNDARY:
  4975. if (Feptr == mb->start_subject) prev_is_word = FALSE; else
  4976. {
  4977. PCRE2_SPTR lastptr = Feptr - 1;
  4978. #ifdef SUPPORT_UNICODE
  4979. if (utf)
  4980. {
  4981. BACKCHAR(lastptr);
  4982. GETCHAR(fc, lastptr);
  4983. }
  4984. else
  4985. #endif /* SUPPORT_UNICODE */
  4986. fc = *lastptr;
  4987. if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
  4988. #ifdef SUPPORT_UNICODE
  4989. if ((mb->poptions & PCRE2_UCP) != 0)
  4990. {
  4991. if (fc == '_') prev_is_word = TRUE; else
  4992. {
  4993. int cat = UCD_CATEGORY(fc);
  4994. prev_is_word = (cat == ucp_L || cat == ucp_N);
  4995. }
  4996. }
  4997. else
  4998. #endif /* SUPPORT_UNICODE */
  4999. prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
  5000. }
  5001. /* Get status of next character */
  5002. if (Feptr >= mb->end_subject)
  5003. {
  5004. SCHECK_PARTIAL();
  5005. cur_is_word = FALSE;
  5006. }
  5007. else
  5008. {
  5009. PCRE2_SPTR nextptr = Feptr + 1;
  5010. #ifdef SUPPORT_UNICODE
  5011. if (utf)
  5012. {
  5013. FORWARDCHARTEST(nextptr, mb->end_subject);
  5014. GETCHAR(fc, Feptr);
  5015. }
  5016. else
  5017. #endif /* SUPPORT_UNICODE */
  5018. fc = *Feptr;
  5019. if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
  5020. #ifdef SUPPORT_UNICODE
  5021. if ((mb->poptions & PCRE2_UCP) != 0)
  5022. {
  5023. if (fc == '_') cur_is_word = TRUE; else
  5024. {
  5025. int cat = UCD_CATEGORY(fc);
  5026. cur_is_word = (cat == ucp_L || cat == ucp_N);
  5027. }
  5028. }
  5029. else
  5030. #endif /* SUPPORT_UNICODE */
  5031. cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
  5032. }
  5033. /* Now see if the situation is what we want */
  5034. if ((*Fecode++ == OP_WORD_BOUNDARY)?
  5035. cur_is_word == prev_is_word : cur_is_word != prev_is_word)
  5036. RRETURN(MATCH_NOMATCH);
  5037. break;
  5038. /* ===================================================================== */
  5039. /* Backtracking (*VERB)s, with and without arguments. Note that if the
  5040. pattern is successfully matched, we do not come back from RMATCH. */
  5041. case OP_MARK:
  5042. Fmark = mb->nomatch_mark = Fecode + 2;
  5043. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
  5044. /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
  5045. argument, and we must check whether that argument matches this MARK's
  5046. argument. It is passed back in mb->verb_skip_ptr. If it does match, we
  5047. return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
  5048. position that corresponds to this mark. Otherwise, pass back the return
  5049. code unaltered. */
  5050. if (rrc == MATCH_SKIP_ARG &&
  5051. PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
  5052. {
  5053. mb->verb_skip_ptr = Feptr; /* Pass back current position */
  5054. RRETURN(MATCH_SKIP);
  5055. }
  5056. RRETURN(rrc);
  5057. case OP_FAIL:
  5058. RRETURN(MATCH_NOMATCH);
  5059. /* Record the current recursing group number in mb->verb_current_recurse
  5060. when a backtracking return such as MATCH_COMMIT is given. This enables the
  5061. recurse processing to catch verbs from within the recursion. */
  5062. case OP_COMMIT:
  5063. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
  5064. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5065. mb->verb_current_recurse = Fcurrent_recurse;
  5066. RRETURN(MATCH_COMMIT);
  5067. case OP_PRUNE:
  5068. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
  5069. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5070. mb->verb_current_recurse = Fcurrent_recurse;
  5071. RRETURN(MATCH_PRUNE);
  5072. case OP_PRUNE_ARG:
  5073. Fmark = mb->nomatch_mark = Fecode + 2;
  5074. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
  5075. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5076. mb->verb_current_recurse = Fcurrent_recurse;
  5077. RRETURN(MATCH_PRUNE);
  5078. case OP_SKIP:
  5079. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
  5080. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5081. mb->verb_skip_ptr = Feptr; /* Pass back current position */
  5082. mb->verb_current_recurse = Fcurrent_recurse;
  5083. RRETURN(MATCH_SKIP);
  5084. /* Note that, for Perl compatibility, SKIP with an argument does NOT set
  5085. nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
  5086. not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
  5087. that failed and any that precede it (either they also failed, or were not
  5088. triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
  5089. SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
  5090. set to the count of the one that failed. */
  5091. case OP_SKIP_ARG:
  5092. mb->skip_arg_count++;
  5093. if (mb->skip_arg_count <= mb->ignore_skip_arg)
  5094. {
  5095. Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
  5096. break;
  5097. }
  5098. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
  5099. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5100. /* Pass back the current skip name and return the special MATCH_SKIP_ARG
  5101. return code. This will either be caught by a matching MARK, or get to the
  5102. top, where it causes a rematch with mb->ignore_skip_arg set to the value of
  5103. mb->skip_arg_count. */
  5104. mb->verb_skip_ptr = Fecode + 2;
  5105. mb->verb_current_recurse = Fcurrent_recurse;
  5106. RRETURN(MATCH_SKIP_ARG);
  5107. /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
  5108. the branch in which it occurs can be determined. */
  5109. case OP_THEN:
  5110. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
  5111. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5112. mb->verb_ecode_ptr = Fecode;
  5113. mb->verb_current_recurse = Fcurrent_recurse;
  5114. RRETURN(MATCH_THEN);
  5115. case OP_THEN_ARG:
  5116. Fmark = mb->nomatch_mark = Fecode + 2;
  5117. RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
  5118. if (rrc != MATCH_NOMATCH) RRETURN(rrc);
  5119. mb->verb_ecode_ptr = Fecode;
  5120. mb->verb_current_recurse = Fcurrent_recurse;
  5121. RRETURN(MATCH_THEN);
  5122. /* ===================================================================== */
  5123. /* There's been some horrible disaster. Arrival here can only mean there is
  5124. something seriously wrong in the code above or the OP_xxx definitions. */
  5125. default:
  5126. return PCRE2_ERROR_INTERNAL;
  5127. }
  5128. /* Do not insert any code in here without much thought; it is assumed
  5129. that "continue" in the code above comes out to here to repeat the main
  5130. loop. */
  5131. } /* End of main loop */
  5132. /* Control never reaches here */
  5133. /* ========================================================================= */
  5134. /* The RRETURN() macro jumps here. The number that is saved in Freturn_id
  5135. indicates which label we actually want to return to. The value in Frdepth is
  5136. the index number of the frame in the vector. The return value has been placed
  5137. in rrc. */
  5138. #define LBL(val) case val: goto L_RM##val;
  5139. RETURN_SWITCH:
  5140. if (Frdepth == 0) return rrc; /* Exit from the top level */
  5141. F = (heapframe *)((char *)F - Fback_frame); /* Back track */
  5142. mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
  5143. #ifdef DEBUG_SHOW_RMATCH
  5144. fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
  5145. #endif
  5146. switch (Freturn_id)
  5147. {
  5148. LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
  5149. LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
  5150. LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
  5151. LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
  5152. LBL(33) LBL(34) LBL(35)
  5153. #ifdef SUPPORT_WIDE_CHARS
  5154. LBL(100) LBL(101)
  5155. #endif
  5156. #ifdef SUPPORT_UNICODE
  5157. LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
  5158. LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
  5159. LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
  5160. LBL(221) LBL(222)
  5161. #endif
  5162. default:
  5163. return PCRE2_ERROR_INTERNAL;
  5164. }
  5165. #undef LBL
  5166. }
  5167. /*************************************************
  5168. * Match a Regular Expression *
  5169. *************************************************/
  5170. /* This function applies a compiled pattern to a subject string and picks out
  5171. portions of the string if it matches. Two elements in the vector are set for
  5172. each substring: the offsets to the start and end of the substring.
  5173. Arguments:
  5174. code points to the compiled expression
  5175. subject points to the subject string
  5176. length length of subject string (may contain binary zeros)
  5177. start_offset where to start in the subject string
  5178. options option bits
  5179. match_data points to a match_data block
  5180. mcontext points a PCRE2 context
  5181. Returns: > 0 => success; value is the number of ovector pairs filled
  5182. = 0 => success, but ovector is not big enough
  5183. -1 => failed to match (PCRE2_ERROR_NOMATCH)
  5184. -2 => partial match (PCRE2_ERROR_PARTIAL)
  5185. < -2 => some kind of unexpected problem
  5186. */
  5187. PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
  5188. pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
  5189. PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
  5190. pcre2_match_context *mcontext)
  5191. {
  5192. int rc;
  5193. const uint8_t *start_bits = NULL;
  5194. const pcre2_real_code *re = (const pcre2_real_code *)code;
  5195. BOOL anchored;
  5196. BOOL firstline;
  5197. BOOL has_first_cu = FALSE;
  5198. BOOL has_req_cu = FALSE;
  5199. BOOL startline;
  5200. BOOL utf;
  5201. PCRE2_UCHAR first_cu = 0;
  5202. PCRE2_UCHAR first_cu2 = 0;
  5203. PCRE2_UCHAR req_cu = 0;
  5204. PCRE2_UCHAR req_cu2 = 0;
  5205. PCRE2_SPTR bumpalong_limit;
  5206. PCRE2_SPTR end_subject;
  5207. PCRE2_SPTR start_match = subject + start_offset;
  5208. PCRE2_SPTR req_cu_ptr = start_match - 1;
  5209. PCRE2_SPTR start_partial = NULL;
  5210. PCRE2_SPTR match_partial = NULL;
  5211. PCRE2_SIZE frame_size;
  5212. /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
  5213. macro is used below, and it expects NLBLOCK to be defined as a pointer. */
  5214. pcre2_callout_block cb;
  5215. match_block actual_match_block;
  5216. match_block *mb = &actual_match_block;
  5217. /* Allocate an initial vector of backtracking frames on the stack. If this
  5218. proves to be too small, it is replaced by a larger one on the heap. To get a
  5219. vector of the size required that is aligned for pointers, allocate it as a
  5220. vector of pointers. */
  5221. PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
  5222. mb->stack_frames = (heapframe *)stack_frames_vector;
  5223. /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
  5224. subject string. */
  5225. if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
  5226. end_subject = subject + length;
  5227. /* Plausibility checks */
  5228. if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
  5229. if (code == NULL || subject == NULL || match_data == NULL)
  5230. return PCRE2_ERROR_NULL;
  5231. if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
  5232. /* Check that the first field in the block is the magic number. */
  5233. if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
  5234. /* Check the code unit width. */
  5235. if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
  5236. return PCRE2_ERROR_BADMODE;
  5237. /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
  5238. options variable for this function. Users of PCRE2 who are not calling the
  5239. function directly would like to have a way of setting these flags, in the same
  5240. way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
  5241. constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
  5242. (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
  5243. transfer to the options for this function. The bits are guaranteed to be
  5244. adjacent, but do not have the same values. This bit of Boolean trickery assumes
  5245. that the match-time bits are not more significant than the flag bits. If by
  5246. accident this is not the case, a compile-time division by zero error will
  5247. occur. */
  5248. #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
  5249. #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
  5250. options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
  5251. #undef FF
  5252. #undef OO
  5253. /* These two settings are used in the code for checking a UTF string that
  5254. follows immediately afterwards. Other values in the mb block are used only
  5255. during interpretive processing, not when the JIT support is in use, so they are
  5256. set up later. */
  5257. utf = (re->overall_options & PCRE2_UTF) != 0;
  5258. mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
  5259. ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
  5260. /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
  5261. time. */
  5262. if (mb->partial != 0 &&
  5263. ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
  5264. return PCRE2_ERROR_BADOPTION;
  5265. /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
  5266. we must also check that a starting offset does not point into the middle of a
  5267. multiunit character. We check only the portion of the subject that is going to
  5268. be inspected during matching - from the offset minus the maximum back reference
  5269. to the given length. This saves time when a small part of a large subject is
  5270. being matched by the use of a starting offset. Note that the maximum lookbehind
  5271. is a number of characters, not code units. */
  5272. #ifdef SUPPORT_UNICODE
  5273. if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  5274. {
  5275. PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
  5276. if (start_offset > 0)
  5277. {
  5278. #if PCRE2_CODE_UNIT_WIDTH != 32
  5279. unsigned int i;
  5280. if (start_match < end_subject && NOT_FIRSTCU(*start_match))
  5281. return PCRE2_ERROR_BADUTFOFFSET;
  5282. for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
  5283. {
  5284. check_subject--;
  5285. while (check_subject > subject &&
  5286. #if PCRE2_CODE_UNIT_WIDTH == 8
  5287. (*check_subject & 0xc0) == 0x80)
  5288. #else /* 16-bit */
  5289. (*check_subject & 0xfc00) == 0xdc00)
  5290. #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
  5291. check_subject--;
  5292. }
  5293. #else
  5294. /* In the 32-bit library, one code unit equals one character. However,
  5295. we cannot just subtract the lookbehind and then compare pointers, because
  5296. a very large lookbehind could create an invalid pointer. */
  5297. if (start_offset >= re->max_lookbehind)
  5298. check_subject -= re->max_lookbehind;
  5299. else
  5300. check_subject = subject;
  5301. #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
  5302. }
  5303. /* Validate the relevant portion of the subject. After an error, adjust the
  5304. offset to be an absolute offset in the whole string. */
  5305. match_data->rc = PRIV(valid_utf)(check_subject,
  5306. length - (check_subject - subject), &(match_data->startchar));
  5307. if (match_data->rc != 0)
  5308. {
  5309. match_data->startchar += check_subject - subject;
  5310. return match_data->rc;
  5311. }
  5312. }
  5313. #endif /* SUPPORT_UNICODE */
  5314. /* It is an error to set an offset limit without setting the flag at compile
  5315. time. */
  5316. if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
  5317. (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
  5318. return PCRE2_ERROR_BADOFFSETLIMIT;
  5319. /* If the pattern was successfully studied with JIT support, run the JIT
  5320. executable instead of the rest of this function. Most options must be set at
  5321. compile time for the JIT code to be usable. Fallback to the normal code path if
  5322. an unsupported option is set or if JIT returns BADOPTION (which means that the
  5323. selected normal or partial matching mode was not compiled). */
  5324. #ifdef SUPPORT_JIT
  5325. if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
  5326. {
  5327. rc = pcre2_jit_match(code, subject, length, start_offset, options,
  5328. match_data, mcontext);
  5329. if (rc != PCRE2_ERROR_JIT_BADOPTION) return rc;
  5330. }
  5331. #endif
  5332. /* Carry on with non-JIT matching. A NULL match context means "use a default
  5333. context", but we take the memory control functions from the pattern. */
  5334. if (mcontext == NULL)
  5335. {
  5336. mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
  5337. mb->memctl = re->memctl;
  5338. }
  5339. else mb->memctl = mcontext->memctl;
  5340. anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
  5341. firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
  5342. startline = (re->flags & PCRE2_STARTLINE) != 0;
  5343. bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
  5344. end_subject : subject + mcontext->offset_limit;
  5345. /* Initialize and set up the fixed fields in the callout block, with a pointer
  5346. in the match block. */
  5347. mb->cb = &cb;
  5348. cb.version = 2;
  5349. cb.subject = subject;
  5350. cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
  5351. cb.callout_flags = 0;
  5352. /* Fill in the remaining fields in the match block. */
  5353. mb->callout = mcontext->callout;
  5354. mb->callout_data = mcontext->callout_data;
  5355. mb->start_subject = subject;
  5356. mb->start_offset = start_offset;
  5357. mb->end_subject = end_subject;
  5358. mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
  5359. mb->moptions = options; /* Match options */
  5360. mb->poptions = re->overall_options; /* Pattern options */
  5361. mb->ignore_skip_arg = 0;
  5362. mb->mark = mb->nomatch_mark = NULL; /* In case never set */
  5363. mb->hitend = FALSE;
  5364. /* The name table is needed for finding all the numbers associated with a
  5365. given name, for condition testing. The code follows the name table. */
  5366. mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
  5367. mb->name_count = re->name_count;
  5368. mb->name_entry_size = re->name_entry_size;
  5369. mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
  5370. /* Process the \R and newline settings. */
  5371. mb->bsr_convention = re->bsr_convention;
  5372. mb->nltype = NLTYPE_FIXED;
  5373. switch(re->newline_convention)
  5374. {
  5375. case PCRE2_NEWLINE_CR:
  5376. mb->nllen = 1;
  5377. mb->nl[0] = CHAR_CR;
  5378. break;
  5379. case PCRE2_NEWLINE_LF:
  5380. mb->nllen = 1;
  5381. mb->nl[0] = CHAR_NL;
  5382. break;
  5383. case PCRE2_NEWLINE_NUL:
  5384. mb->nllen = 1;
  5385. mb->nl[0] = CHAR_NUL;
  5386. break;
  5387. case PCRE2_NEWLINE_CRLF:
  5388. mb->nllen = 2;
  5389. mb->nl[0] = CHAR_CR;
  5390. mb->nl[1] = CHAR_NL;
  5391. break;
  5392. case PCRE2_NEWLINE_ANY:
  5393. mb->nltype = NLTYPE_ANY;
  5394. break;
  5395. case PCRE2_NEWLINE_ANYCRLF:
  5396. mb->nltype = NLTYPE_ANYCRLF;
  5397. break;
  5398. default: return PCRE2_ERROR_INTERNAL;
  5399. }
  5400. /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
  5401. vector at the end, whose size depends on the number of capturing parentheses in
  5402. the pattern. It is not used at all if there are no capturing parentheses.
  5403. frame_size is the total size of each frame
  5404. mb->frame_vector_size is the total usable size of the vector (rounded down
  5405. to a whole number of frames)
  5406. The last of these is changed within the match() function if the frame vector
  5407. has to be expanded. We therefore put it into the match block so that it is
  5408. correct when calling match() more than once for non-anchored patterns. */
  5409. frame_size = offsetof(heapframe, ovector) +
  5410. re->top_bracket * 2 * sizeof(PCRE2_SIZE);
  5411. /* Limits set in the pattern override the match context only if they are
  5412. smaller. */
  5413. mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
  5414. mcontext->heap_limit : re->limit_heap;
  5415. mb->match_limit = (mcontext->match_limit < re->limit_match)?
  5416. mcontext->match_limit : re->limit_match;
  5417. mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
  5418. mcontext->depth_limit : re->limit_depth;
  5419. /* If a pattern has very many capturing parentheses, the frame size may be very
  5420. large. Ensure that there are at least 10 available frames by getting an initial
  5421. vector on the heap if necessary, except when the heap limit prevents this. Get
  5422. fewer if possible. (The heap limit is in kilobytes.) */
  5423. if (frame_size <= START_FRAMES_SIZE/10)
  5424. {
  5425. mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
  5426. mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
  5427. }
  5428. else
  5429. {
  5430. mb->frame_vector_size = frame_size * 10;
  5431. if ((mb->frame_vector_size / 1024) > mb->heap_limit)
  5432. {
  5433. if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
  5434. mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
  5435. }
  5436. mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
  5437. mb->memctl.memory_data);
  5438. if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
  5439. }
  5440. mb->match_frames_top =
  5441. (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
  5442. /* Write to the ovector within the first frame to mark every capture unset and
  5443. to avoid uninitialized memory read errors when it is copied to a new frame. */
  5444. memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
  5445. re->top_bracket * 2 * sizeof(PCRE2_SIZE));
  5446. /* Pointers to the individual character tables */
  5447. mb->lcc = re->tables + lcc_offset;
  5448. mb->fcc = re->tables + fcc_offset;
  5449. mb->ctypes = re->tables + ctypes_offset;
  5450. /* Set up the first code unit to match, if available. If there's no first code
  5451. unit there may be a bitmap of possible first characters. */
  5452. if ((re->flags & PCRE2_FIRSTSET) != 0)
  5453. {
  5454. has_first_cu = TRUE;
  5455. first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
  5456. if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
  5457. {
  5458. first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
  5459. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
  5460. if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
  5461. #endif
  5462. }
  5463. }
  5464. else
  5465. if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
  5466. start_bits = re->start_bitmap;
  5467. /* There may also be a "last known required character" set. */
  5468. if ((re->flags & PCRE2_LASTSET) != 0)
  5469. {
  5470. has_req_cu = TRUE;
  5471. req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
  5472. if ((re->flags & PCRE2_LASTCASELESS) != 0)
  5473. {
  5474. req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
  5475. #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
  5476. if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
  5477. #endif
  5478. }
  5479. }
  5480. /* ==========================================================================*/
  5481. /* Loop for handling unanchored repeated matching attempts; for anchored regexs
  5482. the loop runs just once. */
  5483. for(;;)
  5484. {
  5485. PCRE2_SPTR new_start_match;
  5486. /* ----------------- Start of match optimizations ---------------- */
  5487. /* There are some optimizations that avoid running the match if a known
  5488. starting point is not found, or if a known later code unit is not present.
  5489. However, there is an option (settable at compile time) that disables these,
  5490. for testing and for ensuring that all callouts do actually occur. */
  5491. if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
  5492. {
  5493. /* If firstline is TRUE, the start of the match is constrained to the first
  5494. line of a multiline string. That is, the match must be before or at the
  5495. first newline following the start of matching. Temporarily adjust
  5496. end_subject so that we stop the scans for a first code unit at a newline.
  5497. If the match fails at the newline, later code breaks the loop. */
  5498. if (firstline)
  5499. {
  5500. PCRE2_SPTR t = start_match;
  5501. #ifdef SUPPORT_UNICODE
  5502. if (utf)
  5503. {
  5504. while (t < end_subject && !IS_NEWLINE(t))
  5505. {
  5506. t++;
  5507. ACROSSCHAR(t < end_subject, t, t++);
  5508. }
  5509. }
  5510. else
  5511. #endif
  5512. while (t < end_subject && !IS_NEWLINE(t)) t++;
  5513. end_subject = t;
  5514. }
  5515. /* Anchored: check the first code unit if one is recorded. This may seem
  5516. pointless but it can help in detecting a no match case without scanning for
  5517. the required code unit. */
  5518. if (anchored)
  5519. {
  5520. if (has_first_cu || start_bits != NULL)
  5521. {
  5522. BOOL ok = start_match < end_subject;
  5523. if (ok)
  5524. {
  5525. PCRE2_UCHAR c = UCHAR21TEST(start_match);
  5526. ok = has_first_cu && (c == first_cu || c == first_cu2);
  5527. if (!ok && start_bits != NULL)
  5528. {
  5529. #if PCRE2_CODE_UNIT_WIDTH != 8
  5530. if (c > 255) c = 255;
  5531. #endif
  5532. ok = (start_bits[c/8] & (1 << (c&7))) != 0;
  5533. }
  5534. }
  5535. if (!ok)
  5536. {
  5537. rc = MATCH_NOMATCH;
  5538. break;
  5539. }
  5540. }
  5541. }
  5542. /* Not anchored. Advance to a unique first code unit if there is one. In
  5543. 8-bit mode, the use of memchr() gives a big speed up, even though we have
  5544. to call it twice in caseless mode, in order to find the earliest occurrence
  5545. of the character in either of its cases. */
  5546. else
  5547. {
  5548. if (has_first_cu)
  5549. {
  5550. if (first_cu != first_cu2) /* Caseless */
  5551. {
  5552. #if PCRE2_CODE_UNIT_WIDTH != 8
  5553. PCRE2_UCHAR smc;
  5554. while (start_match < end_subject &&
  5555. (smc = UCHAR21TEST(start_match)) != first_cu &&
  5556. smc != first_cu2)
  5557. start_match++;
  5558. #else /* 8-bit code units */
  5559. PCRE2_SPTR pp1 =
  5560. memchr(start_match, first_cu, end_subject-start_match);
  5561. PCRE2_SPTR pp2 =
  5562. memchr(start_match, first_cu2, end_subject-start_match);
  5563. if (pp1 == NULL)
  5564. start_match = (pp2 == NULL)? end_subject : pp2;
  5565. else
  5566. start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
  5567. #endif
  5568. }
  5569. /* The caseful case */
  5570. else
  5571. {
  5572. #if PCRE2_CODE_UNIT_WIDTH != 8
  5573. while (start_match < end_subject && UCHAR21TEST(start_match) !=
  5574. first_cu)
  5575. start_match++;
  5576. #else
  5577. start_match = memchr(start_match, first_cu, end_subject - start_match);
  5578. if (start_match == NULL) start_match = end_subject;
  5579. #endif
  5580. }
  5581. /* If we can't find the required code unit, having reached the true end
  5582. of the subject, break the bumpalong loop, to force a match failure,
  5583. except when doing partial matching, when we let the next cycle run at
  5584. the end of the subject. To see why, consider the pattern /(?<=abc)def/,
  5585. which partially matches "abc", even though the string does not contain
  5586. the starting character "d". If we have not reached the true end of the
  5587. subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
  5588. we also let the cycle run, because the matching string is legitimately
  5589. allowed to start with the first code unit of a newline. */
  5590. if (!mb->partial && start_match >= mb->end_subject)
  5591. {
  5592. rc = MATCH_NOMATCH;
  5593. break;
  5594. }
  5595. }
  5596. /* If there's no first code unit, advance to just after a linebreak for a
  5597. multiline match if required. */
  5598. else if (startline)
  5599. {
  5600. if (start_match > mb->start_subject + start_offset)
  5601. {
  5602. #ifdef SUPPORT_UNICODE
  5603. if (utf)
  5604. {
  5605. while (start_match < end_subject && !WAS_NEWLINE(start_match))
  5606. {
  5607. start_match++;
  5608. ACROSSCHAR(start_match < end_subject, start_match, start_match++);
  5609. }
  5610. }
  5611. else
  5612. #endif
  5613. while (start_match < end_subject && !WAS_NEWLINE(start_match))
  5614. start_match++;
  5615. /* If we have just passed a CR and the newline option is ANY or
  5616. ANYCRLF, and we are now at a LF, advance the match position by one
  5617. more code unit. */
  5618. if (start_match[-1] == CHAR_CR &&
  5619. (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
  5620. start_match < end_subject &&
  5621. UCHAR21TEST(start_match) == CHAR_NL)
  5622. start_match++;
  5623. }
  5624. }
  5625. /* If there's no first code unit or a requirement for a multiline line
  5626. start, advance to a non-unique first code unit if any have been
  5627. identified. The bitmap contains only 256 bits. When code units are 16 or
  5628. 32 bits wide, all code units greater than 254 set the 255 bit. */
  5629. else if (start_bits != NULL)
  5630. {
  5631. while (start_match < end_subject)
  5632. {
  5633. uint32_t c = UCHAR21TEST(start_match);
  5634. #if PCRE2_CODE_UNIT_WIDTH != 8
  5635. if (c > 255) c = 255;
  5636. #endif
  5637. if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
  5638. start_match++;
  5639. }
  5640. /* See comment above in first_cu checking about the next few lines. */
  5641. if (!mb->partial && start_match >= mb->end_subject)
  5642. {
  5643. rc = MATCH_NOMATCH;
  5644. break;
  5645. }
  5646. }
  5647. } /* End first code unit handling */
  5648. /* Restore fudged end_subject */
  5649. end_subject = mb->end_subject;
  5650. /* The following two optimizations must be disabled for partial matching. */
  5651. if (!mb->partial)
  5652. {
  5653. /* The minimum matching length is a lower bound; no string of that length
  5654. may actually match the pattern. Although the value is, strictly, in
  5655. characters, we treat it as code units to avoid spending too much time in
  5656. this optimization. */
  5657. if (end_subject - start_match < re->minlength)
  5658. {
  5659. rc = MATCH_NOMATCH;
  5660. break;
  5661. }
  5662. /* If req_cu is set, we know that that code unit must appear in the
  5663. subject for the (non-partial) match to succeed. If the first code unit is
  5664. set, req_cu must be later in the subject; otherwise the test starts at
  5665. the match point. This optimization can save a huge amount of backtracking
  5666. in patterns with nested unlimited repeats that aren't going to match.
  5667. Writing separate code for caseful/caseless versions makes it go faster,
  5668. as does using an autoincrement and backing off on a match. As in the case
  5669. of the first code unit, using memchr() in the 8-bit library gives a big
  5670. speed up. Unlike the first_cu check above, we do not need to call
  5671. memchr() twice in the caseless case because we only need to check for the
  5672. presence of the character in either case, not find the first occurrence.
  5673. HOWEVER: when the subject string is very, very long, searching to its end
  5674. can take a long time, and give bad performance on quite ordinary
  5675. patterns. This showed up when somebody was matching something like
  5676. /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
  5677. sufficiently long. */
  5678. if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
  5679. {
  5680. PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
  5681. /* We don't need to repeat the search if we haven't yet reached the
  5682. place we found it last time round the bumpalong loop. */
  5683. if (p > req_cu_ptr)
  5684. {
  5685. if (p < end_subject)
  5686. {
  5687. if (req_cu != req_cu2) /* Caseless */
  5688. {
  5689. #if PCRE2_CODE_UNIT_WIDTH != 8
  5690. do
  5691. {
  5692. uint32_t pp = UCHAR21INCTEST(p);
  5693. if (pp == req_cu || pp == req_cu2) { p--; break; }
  5694. }
  5695. while (p < end_subject);
  5696. #else /* 8-bit code units */
  5697. PCRE2_SPTR pp = p;
  5698. p = memchr(pp, req_cu, end_subject - pp);
  5699. if (p == NULL)
  5700. {
  5701. p = memchr(pp, req_cu2, end_subject - pp);
  5702. if (p == NULL) p = end_subject;
  5703. }
  5704. #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
  5705. }
  5706. /* The caseful case */
  5707. else
  5708. {
  5709. #if PCRE2_CODE_UNIT_WIDTH != 8
  5710. do
  5711. {
  5712. if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
  5713. }
  5714. while (p < end_subject);
  5715. #else /* 8-bit code units */
  5716. p = memchr(p, req_cu, end_subject - p);
  5717. if (p == NULL) p = end_subject;
  5718. #endif
  5719. }
  5720. }
  5721. /* If we can't find the required code unit, break the bumpalong loop,
  5722. forcing a match failure. */
  5723. if (p >= end_subject)
  5724. {
  5725. rc = MATCH_NOMATCH;
  5726. break;
  5727. }
  5728. /* If we have found the required code unit, save the point where we
  5729. found it, so that we don't search again next time round the bumpalong
  5730. loop if the start hasn't yet passed this code unit. */
  5731. req_cu_ptr = p;
  5732. }
  5733. }
  5734. }
  5735. }
  5736. /* ------------ End of start of match optimizations ------------ */
  5737. /* Give no match if we have passed the bumpalong limit. */
  5738. if (start_match > bumpalong_limit)
  5739. {
  5740. rc = MATCH_NOMATCH;
  5741. break;
  5742. }
  5743. /* OK, we can now run the match. If "hitend" is set afterwards, remember the
  5744. first starting point for which a partial match was found. */
  5745. cb.start_match = (PCRE2_SIZE)(start_match - subject);
  5746. cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
  5747. mb->start_used_ptr = start_match;
  5748. mb->last_used_ptr = start_match;
  5749. mb->match_call_count = 0;
  5750. mb->end_offset_top = 0;
  5751. mb->skip_arg_count = 0;
  5752. rc = match(start_match, mb->start_code, match_data->ovector,
  5753. match_data->oveccount, re->top_bracket, frame_size, mb);
  5754. if (mb->hitend && start_partial == NULL)
  5755. {
  5756. start_partial = mb->start_used_ptr;
  5757. match_partial = start_match;
  5758. }
  5759. switch(rc)
  5760. {
  5761. /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
  5762. the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
  5763. entirely. The only way we can do that is to re-do the match at the same
  5764. point, with a flag to force SKIP with an argument to be ignored. Just
  5765. treating this case as NOMATCH does not work because it does not check other
  5766. alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
  5767. case MATCH_SKIP_ARG:
  5768. new_start_match = start_match;
  5769. mb->ignore_skip_arg = mb->skip_arg_count;
  5770. break;
  5771. /* SKIP passes back the next starting point explicitly, but if it is no
  5772. greater than the match we have just done, treat it as NOMATCH. */
  5773. case MATCH_SKIP:
  5774. if (mb->verb_skip_ptr > start_match)
  5775. {
  5776. new_start_match = mb->verb_skip_ptr;
  5777. break;
  5778. }
  5779. /* Fall through */
  5780. /* NOMATCH and PRUNE advance by one character. THEN at this level acts
  5781. exactly like PRUNE. Unset ignore SKIP-with-argument. */
  5782. case MATCH_NOMATCH:
  5783. case MATCH_PRUNE:
  5784. case MATCH_THEN:
  5785. mb->ignore_skip_arg = 0;
  5786. new_start_match = start_match + 1;
  5787. #ifdef SUPPORT_UNICODE
  5788. if (utf)
  5789. ACROSSCHAR(new_start_match < end_subject, new_start_match,
  5790. new_start_match++);
  5791. #endif
  5792. break;
  5793. /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
  5794. case MATCH_COMMIT:
  5795. rc = MATCH_NOMATCH;
  5796. goto ENDLOOP;
  5797. /* Any other return is either a match, or some kind of error. */
  5798. default:
  5799. goto ENDLOOP;
  5800. }
  5801. /* Control reaches here for the various types of "no match at this point"
  5802. result. Reset the code to MATCH_NOMATCH for subsequent checking. */
  5803. rc = MATCH_NOMATCH;
  5804. /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
  5805. newline in the subject (though it may continue over the newline). Therefore,
  5806. if we have just failed to match, starting at a newline, do not continue. */
  5807. if (firstline && IS_NEWLINE(start_match)) break;
  5808. /* Advance to new matching position */
  5809. start_match = new_start_match;
  5810. /* Break the loop if the pattern is anchored or if we have passed the end of
  5811. the subject. */
  5812. if (anchored || start_match > end_subject) break;
  5813. /* If we have just passed a CR and we are now at a LF, and the pattern does
  5814. not contain any explicit matches for \r or \n, and the newline option is CRLF
  5815. or ANY or ANYCRLF, advance the match position by one more code unit. In
  5816. normal matching start_match will aways be greater than the first position at
  5817. this stage, but a failed *SKIP can cause a return at the same point, which is
  5818. why the first test exists. */
  5819. if (start_match > subject + start_offset &&
  5820. start_match[-1] == CHAR_CR &&
  5821. start_match < end_subject &&
  5822. *start_match == CHAR_NL &&
  5823. (re->flags & PCRE2_HASCRORLF) == 0 &&
  5824. (mb->nltype == NLTYPE_ANY ||
  5825. mb->nltype == NLTYPE_ANYCRLF ||
  5826. mb->nllen == 2))
  5827. start_match++;
  5828. mb->mark = NULL; /* Reset for start of next match attempt */
  5829. } /* End of for(;;) "bumpalong" loop */
  5830. /* ==========================================================================*/
  5831. /* When we reach here, one of the following stopping conditions is true:
  5832. (1) The match succeeded, either completely, or partially;
  5833. (2) The pattern is anchored or the match was failed after (*COMMIT);
  5834. (3) We are past the end of the subject or the bumpalong limit;
  5835. (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
  5836. this option requests that a match occur at or before the first newline in
  5837. the subject.
  5838. (5) Some kind of error occurred.
  5839. */
  5840. ENDLOOP:
  5841. /* Release an enlarged frame vector that is on the heap. */
  5842. if (mb->match_frames != mb->stack_frames)
  5843. mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
  5844. /* Fill in fields that are always returned in the match data. */
  5845. match_data->code = re;
  5846. match_data->subject = subject;
  5847. match_data->mark = mb->mark;
  5848. match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
  5849. /* Handle a fully successful match. Set the return code to the number of
  5850. captured strings, or 0 if there were too many to fit into the ovector, and then
  5851. set the remaining returned values before returning. */
  5852. if (rc == MATCH_MATCH)
  5853. {
  5854. match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
  5855. 0 : (int)mb->end_offset_top/2 + 1;
  5856. match_data->startchar = start_match - subject;
  5857. match_data->leftchar = mb->start_used_ptr - subject;
  5858. match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
  5859. mb->last_used_ptr : mb->end_match_ptr) - subject;
  5860. return match_data->rc;
  5861. }
  5862. /* Control gets here if there has been a partial match, an error, or if the
  5863. overall match attempt has failed at all permitted starting positions. Any mark
  5864. data is in the nomatch_mark field. */
  5865. match_data->mark = mb->nomatch_mark;
  5866. /* For anything other than nomatch or partial match, just return the code. */
  5867. if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
  5868. /* Handle a partial match. */
  5869. else if (match_partial != NULL)
  5870. {
  5871. match_data->ovector[0] = match_partial - subject;
  5872. match_data->ovector[1] = end_subject - subject;
  5873. match_data->startchar = match_partial - subject;
  5874. match_data->leftchar = start_partial - subject;
  5875. match_data->rightchar = end_subject - subject;
  5876. match_data->rc = PCRE2_ERROR_PARTIAL;
  5877. }
  5878. /* Else this is the classic nomatch case. */
  5879. else match_data->rc = PCRE2_ERROR_NOMATCH;
  5880. return match_data->rc;
  5881. }
  5882. /* End of pcre2_match.c */