Changeset 7693 for trunk/MagicSoft/Mars/mranforest
- Timestamp:
- 05/05/06 07:46:50 (19 years ago)
- Location:
- trunk/MagicSoft/Mars/mranforest
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/MagicSoft/Mars/mranforest/MRanForest.cc
r7685 r7693 51 51 #include "MParList.h" 52 52 53 #include "MArrayI.h" 54 #include "MArrayF.h" 55 53 56 #include "MLog.h" 54 57 #include "MLogManip.h" … … 301 304 302 305 fHadTrue.Set(numdata); 303 fHadTrue.Reset( 0);306 fHadTrue.Reset(); 304 307 305 308 for (Int_t j=0;j<numdata;j++) … … 312 315 // setup labels for classification/regression 313 316 fClass.Set(numdata); 314 fClass.Reset( 0);317 fClass.Reset(); 315 318 316 319 if (fClassify) … … 320 323 // allocating and initializing arrays 321 324 fHadEst.Set(numdata); 322 fHadEst.Reset( 0);325 fHadEst.Reset(); 323 326 324 327 fNTimesOutBag.Set(numdata); 325 fNTimesOutBag.Reset( 0);328 fNTimesOutBag.Reset(); 326 329 327 330 fDataSort.Set(dim*numdata); 328 fDataSort.Reset( 0);331 fDataSort.Reset(); 329 332 330 333 fDataRang.Set(dim*numdata); 331 fDataRang.Reset( 0);334 fDataRang.Reset(); 332 335 333 336 Bool_t useweights = fWeight.GetSize()==numdata; … … 438 441 439 442 if(calcResolution) 440 *fLog << " no. of tree no. of nodes resolution in% (from oob-data -> overest. of error)" << endl;443 *fLog << "TreeNum BagSize NumNodes TestSize res/% (from oob-data -> overest. of error)" << endl; 441 444 else 442 *fLog << " no. of tree no. of nodes rms in% (from oob-data -> overest. of error)" << endl;445 *fLog << "TreeNum BagSize NumNodes TestSize rms/% (from oob-data -> overest. of error)" << endl; 443 446 // 12345678901234567890123456789012345678901234567890 444 447 } … … 450 453 // bootstrap aggregating (bagging) -> sampling with replacement: 451 454 452 TArrayF classpopw(nclass);453 TArrayI jinbag(numdata); // Initialization includes filling with 0454 TArrayF winbag(numdata); // Initialization includes filling with 0455 MArrayF classpopw(nclass); 456 MArrayI jinbag(numdata); // Initialization includes filling with 0 457 MArrayF winbag(numdata); // Initialization includes filling with 0 455 458 456 459 float square=0; … … 463 466 // all events in the training sample 464 467 465 const Int_t k = Int_t(gRandom->Rndm()*numdata);468 const Int_t k = gRandom->Integer(numdata); 466 469 467 470 if(fClassify) … … 472 475 mean +=fHadTrue[k]*fWeight[k]; 473 476 square+=fHadTrue[k]*fHadTrue[k]*fWeight[k]; 474 475 winbag[k]+=fWeight[k]; 477 478 winbag[k]+=fWeight[k]; // Increase weight if chosen more than once 476 479 jinbag[k]=1; 477 478 480 } 479 481 … … 483 485 // In bagging procedure ca. 2/3 of all elements in the original 484 486 // training sample are used to build the in-bag data 485 TArrayI datsortinbag=fDataSort; 486 Int_t ninbag=0; 487 488 ModifyDataSort(datsortinbag, ninbag, jinbag); 489 490 fRanTree->GrowTree(fMatrix,fHadTrue,fClass,datsortinbag,fDataRang,classpopw,mean,square, 487 const MArrayF hadtrue(fHadTrue.GetSize(), fHadTrue.GetArray()); 488 const MArrayI fclass(fClass.GetSize(), fClass.GetArray()); 489 const MArrayI datarang(fDataRang.GetSize(), fDataRang.GetArray()); 490 491 MArrayI datsortinbag(fDataSort.GetSize(), fDataSort.GetArray()); 492 493 ModifyDataSort(datsortinbag, jinbag); 494 495 fRanTree->GrowTree(fMatrix,hadtrue,fclass,datsortinbag,datarang,classpopw,mean,square, 491 496 jinbag,winbag,nclass); 492 497 … … 501 506 // determined from oob-data is underestimated, but can still be taken as upper limit. 502 507 508 Int_t ninbag = 0; 503 509 for (Int_t ievt=0;ievt<numdata;ievt++) 504 510 { 505 511 if (jinbag[ievt]>0) 512 { 513 ninbag++; 506 514 continue; 515 } 507 516 508 517 fHadEst[ievt] +=fRanTree->TreeHad((*fMatrix), ievt); 509 518 fNTimesOutBag[ievt]++; 510 511 519 } 512 520 … … 529 537 //------------------------------------------------------------------- 530 538 // give running output 531 *fLog << setw(5) << fTreeNo; 532 *fLog << setw(18) << fRanTree->GetNumEndNodes(); 539 *fLog << setw(4) << fTreeNo; 540 *fLog << Form(" %8.1f", 100.*ninbag/numdata); 541 *fLog << setw(9) << fRanTree->GetNumEndNodes(); 542 *fLog << Form(" %9.1f", 100.*n/numdata); 533 543 *fLog << Form("%18.2f", ferr*100.); 534 544 *fLog << endl; … … 563 573 { 564 574 v[n]=(*fMatrix)(n,mvar); 565 isort[n]=n;575 //isort[n]=n; 566 576 567 577 if(TMath::IsNaN(v[n])) … … 607 617 } 608 618 609 void MRanForest::ModifyDataSort(TArrayI &datsortinbag, Int_t ninbag, const TArrayI &jinbag) 619 // Reoves all indices which are not in the bag from the datsortinbag 620 void MRanForest::ModifyDataSort(MArrayI &datsortinbag, const MArrayI &jinbag) 610 621 { 611 622 const Int_t numdim=GetNumDim(); 612 623 const Int_t numdata=GetNumData(); 613 624 614 ninbag=0;625 Int_t ninbag=0; 615 626 for (Int_t n=0;n<numdata;n++) 616 627 if(jinbag[n]==1) ninbag++; … … 618 629 for(Int_t m=0;m<numdim;m++) 619 630 { 631 Int_t *subsort = &datsortinbag[m*numdata]; 632 620 633 Int_t k=0; 621 Int_t nt=0; 622 for(Int_t n=0;n<numdata;n++) 634 for(Int_t n=0;n<ninbag;n++) 623 635 { 624 if(jinbag[ datsortinbag[m*numdata+k]]==1)636 if(jinbag[subsort[k]]==1) 625 637 { 626 datsortinbag[m*numdata+nt]=datsortinbag[m*numdata+k];638 subsort[n] = subsort[k]; 627 639 k++; 628 640 }else{ 629 for(Int_t j= 1;j<numdata-k;j++)641 for(Int_t j=k+1;j<numdata;j++) 630 642 { 631 if(jinbag[ datsortinbag[m*numdata+k+j]]==1)643 if(jinbag[subsort[j]]==1) 632 644 { 633 datsortinbag[m*numdata+nt]=datsortinbag[m*numdata+k+j];634 k +=j+1;645 subsort[n] = subsort[j]; 646 k = j+1; 635 647 break; 636 648 } 637 649 } 638 650 } 639 nt++;640 if(nt>=ninbag) break;641 651 } 642 652 } -
trunk/MagicSoft/Mars/mranforest/MRanForest.h
r7535 r7693 21 21 class TVector; 22 22 class TObjArray; 23 24 class MArrayI; 25 class MArrayF; 23 26 24 27 class MRanTree; … … 68 71 // create and modify (->due to bagging) fDataSort 69 72 Bool_t CreateDataSort(); 70 void ModifyDataSort( TArrayI &datsortinbag, Int_t ninbag, const TArrayI &jinbag);73 void ModifyDataSort(MArrayI &datsortinbag, const MArrayI &jinbag); 71 74 72 75 public: -
trunk/MagicSoft/Mars/mranforest/MRanTree.cc
r7420 r7693 38 38 #include <TRandom.h> 39 39 40 #include "MArrayI.h" 41 #include "MArrayF.h" 42 40 43 #include "MLog.h" 41 44 #include "MLogManip.h" … … 95 98 } 96 99 97 void MRanTree::GrowTree(TMatrix *mat, const TArrayF &hadtrue, const TArrayI &idclass,98 TArrayI &datasort, const TArrayI &datarang, TArrayF &tclasspop,99 float &mean, float &square, TArrayI &jinbag, const TArrayF &winbag,100 void MRanTree::GrowTree(TMatrix *mat, const MArrayF &hadtrue, const MArrayI &idclass, 101 MArrayI &datasort, const MArrayI &datarang, MArrayF &tclasspop, 102 const Float_t &mean, const Float_t &square, const MArrayI &jinbag, const MArrayF &winbag, 100 103 const int nclass) 101 104 { … … 110 113 for (Int_t n=0;n<numdata;n++) if(jinbag[n]==1) ninbag++; 111 114 112 TArrayI bestsplit(nrnodes); bestsplit.Reset(0);113 TArrayI bestsplitnext(nrnodes); bestsplitnext.Reset(0);114 115 fBestVar.Set(nrnodes); fBestVar.Reset( 0);116 fTreeMap1.Set(nrnodes); fTreeMap1.Reset( 0);117 fTreeMap2.Set(nrnodes); fTreeMap2.Reset( 0);118 fBestSplit.Set(nrnodes); fBestSplit.Reset( 0);119 fGiniDec.Set(numdim); fGiniDec.Reset( 0);115 MArrayI bestsplit(nrnodes); 116 MArrayI bestsplitnext(nrnodes); 117 118 fBestVar.Set(nrnodes); fBestVar.Reset(); 119 fTreeMap1.Set(nrnodes); fTreeMap1.Reset(); 120 fTreeMap2.Set(nrnodes); fTreeMap2.Reset(); 121 fBestSplit.Set(nrnodes); fBestSplit.Reset(); 122 fGiniDec.Set(numdim); fGiniDec.Reset(); 120 123 121 124 … … 139 142 const Int_t &msp =fBestVar[k]; 140 143 141 fBestSplit[k] = (*mat)(bsp, msp); 142 fBestSplit[k] += (*mat)(bspn,msp); 143 fBestSplit[k] /= 2.; 144 fBestSplit[k] = ((*mat)(bsp, msp)+(*mat)(bspn,msp))/2; 144 145 } 145 146 … … 151 152 } 152 153 153 int MRanTree::FindBestSplitGini(const TArrayI &datasort,const TArrayI &datarang,154 const TArrayF &hadtrue,const TArrayI &idclass,155 Int_t ndstart,Int_t ndend, TArrayF &tclasspop,156 float &mean, float &square, Int_t &msplit,157 Float_t &decsplit,Int_t &nbest, const TArrayF &winbag,154 int MRanTree::FindBestSplitGini(const MArrayI &datasort,const MArrayI &datarang, 155 const MArrayF &hadtrue,const MArrayI &idclass, 156 Int_t ndstart,Int_t ndend, const MArrayF &tclasspop, 157 const Float_t &mean, const Float_t &square, Int_t &msplit, 158 Float_t &decsplit,Int_t &nbest, const MArrayF &winbag, 158 159 const int nclass) 159 160 { … … 161 162 const Int_t numdata = (nrnodes-1)/2; 162 163 const Int_t mdim = fGiniDec.GetSize(); 163 164 TArrayF wr(nclass); wr.Reset(0);// right node165 164 166 165 // For the best split, msplit is the index of the variable (e.g Hillas par., … … 202 201 Double_t rld=0; 203 202 204 TArrayF wl(nclass); wl.Reset(0.);// left node //nclass205 wr = tclasspop;203 MArrayF wl(nclass); // left node //nclass 204 MArrayF wr(tclasspop); // right node//nclass 206 205 207 206 Double_t critvar=-1.0e20; … … 230 229 const Double_t crit=(rln/rld)+(rrn/rrd); 231 230 232 233 231 if (crit<=critvar) continue; 234 232 … … 249 247 } 250 248 251 int MRanTree::FindBestSplitSigma(const TArrayI &datasort,const TArrayI &datarang,252 const TArrayF &hadtrue, const TArrayI &idclass,253 Int_t ndstart,Int_t ndend, TArrayF &tclasspop,254 float &mean, float &square, Int_t &msplit,255 Float_t &decsplit,Int_t &nbest, const TArrayF &winbag,249 int MRanTree::FindBestSplitSigma(const MArrayI &datasort,const MArrayI &datarang, 250 const MArrayF &hadtrue, const MArrayI &idclass, 251 Int_t ndstart,Int_t ndend, const MArrayF &tclasspop, 252 const Float_t &mean, const Float_t &square, Int_t &msplit, 253 Float_t &decsplit,Int_t &nbest, const MArrayF &winbag, 256 254 const int nclass) 257 255 { … … 259 257 const Int_t numdata = (nrnodes-1)/2; 260 258 const Int_t mdim = fGiniDec.GetSize(); 261 262 float wr=0;// right node263 259 264 260 // For the best split, msplit is the index of the variable (e.g Hillas par., zenith angle ,...) … … 300 296 for (Int_t mt=0; mt<fNumTry; mt++) 301 297 { 302 const Int_t mvar= Int_t(gRandom->Rndm()*mdim);298 const Int_t mvar= gRandom->Integer(mdim); 303 299 const Int_t mn = mvar*numdata; 304 300 305 301 Double_t rrn=0, rrd=0, rln=0, rld=0; 306 Double_t esumr=0, esuml=0, e2sumr=0,e2suml=0; 307 308 esumr =mean; 309 e2sumr=square; 310 esuml =0; 311 e2suml=0; 302 303 Double_t esumr =mean; 304 Double_t e2sumr=square; 305 Double_t esuml =0; 306 Double_t e2suml=0; 312 307 313 308 float wl=0.;// left node 314 wr = tclasspop[0];309 float wr=tclasspop[0]; // right node 315 310 316 311 Double_t critvar=critmin; … … 400 395 } 401 396 402 void MRanTree::MoveData( TArrayI &datasort,Int_t ndstart, Int_t ndend,403 TArrayI &idmove,TArrayI &ncase,Int_t msplit,397 void MRanTree::MoveData(MArrayI &datasort,Int_t ndstart, Int_t ndend, 398 MArrayI &idmove,MArrayI &ncase,Int_t msplit, 404 399 Int_t nbest,Int_t &ndendl) 405 400 { … … 410 405 const Int_t mdim = fGiniDec.GetSize(); 411 406 412 TArrayI tdatasort(numdata); tdatasort.Reset(0);407 MArrayI tdatasort(numdata); 413 408 414 409 // compute idmove = indicator of case nos. going left … … 448 443 } 449 444 450 void MRanTree::BuildTree( TArrayI &datasort,const TArrayI &datarang, const TArrayF &hadtrue,451 const TArrayI &idclass, TArrayI &bestsplit, TArrayI &bestsplitnext,452 TArrayF &tclasspop, float &tmean, float &tsquare, const TArrayF &winbag,445 void MRanTree::BuildTree(MArrayI &datasort,const MArrayI &datarang, const MArrayF &hadtrue, 446 const MArrayI &idclass, MArrayI &bestsplit, MArrayI &bestsplitnext, 447 MArrayF &tclasspop, const Float_t &tmean, const Float_t &tsquare, const MArrayF &winbag, 453 448 Int_t ninbag, const int nclass) 454 449 { … … 470 465 const Int_t numdata = (nrnodes-1)/2; 471 466 472 TArrayI nodepop(nrnodes); nodepop.Reset(0);473 TArrayI nodestart(nrnodes); nodestart.Reset(0);474 TArrayI parent(nrnodes); parent.Reset(0);475 476 TArrayI ncase(numdata); ncase.Reset(0);477 TArrayI idmove(numdata); idmove.Reset(0);478 TArrayI iv(mdim); iv.Reset(0);479 480 TArrayF classpop(nrnodes*nclass); classpop.Reset(0.);//nclass481 TArrayI nodestatus(nrnodes); nodestatus.Reset(0);467 MArrayI nodepop(nrnodes); 468 MArrayI nodestart(nrnodes); 469 MArrayI parent(nrnodes); 470 471 MArrayI ncase(numdata); 472 MArrayI idmove(numdata); 473 MArrayI iv(mdim); 474 475 MArrayF classpop(nrnodes*nclass);//nclass 476 MArrayI nodestatus(nrnodes); 482 477 483 478 for (Int_t j=0;j<nclass;j++) 484 479 classpop[j*nrnodes+0]=tclasspop[j]; 485 480 486 TArrayF mean(nrnodes); mean.Reset(0.);487 TArrayF square(nrnodes); square.Reset(0.);481 MArrayF mean(nrnodes); 482 MArrayF square(nrnodes); 488 483 489 484 mean[0]=tmean; … … 509 504 tclasspop[j]=classpop[j*nrnodes+kbuild]; 510 505 511 tmean=mean[kbuild];512 tsquare=square[kbuild];513 514 506 Int_t msplit, nbest; 515 507 Float_t decsplit=0; 516 508 517 if (( *this.*FindBestSplit)(datasort,datarang,hadtrue,idclass,ndstart,518 ndend, tclasspop, tmean, tsquare,msplit,decsplit,509 if ((this->*FindBestSplit)(datasort,datarang,hadtrue,idclass,ndstart, 510 ndend, tclasspop,mean[kbuild],square[kbuild],msplit,decsplit, 519 511 nbest,winbag,nclass)) 520 512 { -
trunk/MagicSoft/Mars/mranforest/MRanTree.h
r7424 r7693 19 19 class TRandom; 20 20 21 class MArrayI; 22 class MArrayF; 23 21 24 class MRanTree : public MParContainer 22 25 { … … 38 41 39 42 int (MRanTree::*FindBestSplit) 40 (const TArrayI &, const TArrayI &, const TArrayF &, const TArrayI &,41 Int_t, Int_t , TArrayF &, float &, float &, Int_t &, Float_t &,42 Int_t &, const TArrayF &, const int); //!43 (const MArrayI &, const MArrayI &, const MArrayF &, const MArrayI &, 44 Int_t, Int_t , const MArrayF &, const Float_t &, const Float_t &, Int_t &, Float_t &, 45 Int_t &, const MArrayF &, const int); //! 43 46 44 47 45 int FindBestSplitGini(const TArrayI &datasort, const TArrayI &datarang,46 const TArrayF &hadtrue, const TArrayI &idclass,47 Int_t ndstart, Int_t ndend, TArrayF &tclasspop,48 float &mean, float &square, Int_t &msplit,49 Float_t &decsplit, Int_t &nbest, const TArrayF &winbag,48 int FindBestSplitGini(const MArrayI &datasort, const MArrayI &datarang, 49 const MArrayF &hadtrue, const MArrayI &idclass, 50 Int_t ndstart, Int_t ndend, const MArrayF &tclasspop, 51 const Float_t &mean, const Float_t &square, Int_t &msplit, 52 Float_t &decsplit, Int_t &nbest, const MArrayF &winbag, 50 53 const int nclass); 51 54 52 int FindBestSplitSigma(const TArrayI &datasort, const TArrayI &datarang,53 const TArrayF &hadtrue, const TArrayI &idclass,54 Int_t ndstart, Int_t ndend, TArrayF &tclasspop,55 float &mean, float &square, Int_t &msplit,56 Float_t &decsplit, Int_t &nbest, const TArrayF &winbag,55 int FindBestSplitSigma(const MArrayI &datasort, const MArrayI &datarang, 56 const MArrayF &hadtrue, const MArrayI &idclass, 57 Int_t ndstart, Int_t ndend, const MArrayF &tclasspop, 58 const Float_t &mean, const Float_t &square, Int_t &msplit, 59 Float_t &decsplit, Int_t &nbest, const MArrayF &winbag, 57 60 const int nclass); 58 61 59 void MoveData( TArrayI &datasort, Int_t ndstart, Int_t ndend,60 TArrayI &idmove, TArrayI &ncase, Int_t msplit,62 void MoveData(MArrayI &datasort, Int_t ndstart, Int_t ndend, 63 MArrayI &idmove, MArrayI &ncase, Int_t msplit, 61 64 Int_t nbest, Int_t &ndendl); 62 65 63 void BuildTree( TArrayI &datasort, const TArrayI &datarang, const TArrayF &hadtrue,64 const TArrayI &idclass,TArrayI &bestsplit,TArrayI &bestsplitnext,65 TArrayF &tclasspop, float &tmean, float &tsquare, const TArrayF &winbag,66 void BuildTree(MArrayI &datasort, const MArrayI &datarang, const MArrayF &hadtrue, 67 const MArrayI &idclass,MArrayI &bestsplit,MArrayI &bestsplitnext, 68 MArrayF &tclasspop, const Float_t &tmean, const Float_t &tsquare, const MArrayF &winbag, 66 69 Int_t ninbag, const int nclass); 67 70 … … 93 96 94 97 // functions used in tree growing process 95 void GrowTree(TMatrix *mat, const TArrayF &hadtrue, const TArrayI &idclass,96 TArrayI &datasort, const TArrayI &datarang,TArrayF &tclasspop,97 float &mean, float &square, TArrayI &jinbag, const TArrayF &winbag,98 void GrowTree(TMatrix *mat, const MArrayF &hadtrue, const MArrayI &idclass, 99 MArrayI &datasort, const MArrayI &datarang,MArrayF &tclasspop, 100 const Float_t &mean, const Float_t &square, const MArrayI &jinbag, const MArrayF &winbag, 98 101 const int nclass); 99 102
Note:
See TracChangeset
for help on using the changeset viewer.