Skip to content

Commit

Permalink
Merge pull request #784 from Panos512/20240424_adds_ressurection_helper
Browse files Browse the repository at this point in the history
adds scripts used to resurrect dids
  • Loading branch information
dynamic-entropy authored Apr 25, 2024
2 parents ee21a84 + cf8254d commit 1e83e37
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 0 deletions.
12 changes: 12 additions & 0 deletions helpers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,15 @@ curl -X POST --cert $X509_USER_PROXY --key $X509_USER_PROXY --capath /etc/grid-s
```

The document for rest api specification can be found [here](https://cernbox.cern.ch/pdf-viewer/public/vLhBpHDdaXJSqwW/WLCG%20Tape%20REST%20API%20reference%20document.pdf)


### resurrect_dids.py

These are a set of scripts used to resurrect removed dids from the DELETED_DIDS table and then restore them in Rucio.
In order to do that one has to:

1. Ressurect the DIDs
2. Re-link DIDs (attach blocks to datasets and files to blocks)
3. Make sure that the files actually exist in some RSE (possibly using gfal-stat)
4. Manually add a replica that points to the existing file in the above found RSE

119 changes: 119 additions & 0 deletions helpers/resurrect_dids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
RESSURECT BLOCKS
INPUT: blocks.txt
/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#071adca4-e33d-4137-b2f3-a88ceec57df2
/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#71034aed-f14b-46e6-9f96-a77aa723f344
/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#186e60f2-ad4e-4643-b6b8-3af894f0f37b
"""
from rucio.client import Client
client = Client()

block_dids = [] # 313 blocks
with open("./blocks.txt", "r") as blocks:
for block in blocks:
block_dids.append({'scope': 'cms', 'name':block.strip(), 'type': 'DATASET'})

client.resurrect(dids=block_dids)


"""
RESSURECT FILES (in chuncks of 500)
INPUT: files.txt
/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/90001/17F92391-80F9-9947-A700-6E10C2EFC6BF.root,253165146,bd624b5c
/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/30000/6A5C5723-9B10-E346-B3E3-92DDEE46793C.root,2052471395,24ca1b15
/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/30000/BB2A8199-56F7-6943-9A79-E2F0DCDA47FD.root,495981052,8b681234
"""
from rucio.client import Client
client = Client()


def chunk_list(lst, chunk_size):
# Loop from 0 to the length of the list, in steps of chunk_size
for i in range(0, len(lst), chunk_size):
# Yield a slice of the list from i to i + chunk_size
yield lst[i:i + chunk_size]


file_dids = [] # 4602 files
with open("./files.txt", "r") as files:
for file in files:
file_name = file.strip().split(',')[0]
file_dids.append({'scope': 'cms', 'name':file_name, 'type': 'FILE'})

# Slice list to ressurect in batches of 500
chunks = list(chunk_list(file_dids, 500))

for chunk in chunks:
client.resurrect(dids=chunk)




"""
ATTACH BLOCKS TO DATASETS and FILES TO BLOCKS
Running this twice for the two csv files
INPUT: container_datasets.csv and datasets_files.csv
"/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM","/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#071adca4-e33d-4137-b2f3-a88ceec57df2"
"/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM","/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#71034aed-f14b-46e6-9f96-a77aa723f344"
"/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#02a795af-1079-47be-86cc-28c945f737bd","/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/90001/5537F144-780F-CC4F-9F8E-6570A347FF09.root"
"/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/RunIIFall18wmLHEGS-VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/GEN-SIM#04232855-6131-4790-be27-93b48cbc5b1d","/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/80000/E2A722AC-A875-E543-96F0-9DFFF7508504.root"
"""
from rucio.client import Client
import csv

client = Client()

contents = {}

csv_file_path=''

with open(csv_file_path, mode='r') as file:
reader = csv.reader(file)
for row in reader:
did = row[0].strip('"')
child = row[1].strip('"')
contents.setdefault(did, []).append(child)


for did, childs in contents.items():
dids_to_attach = []
for child in childs:
dids_to_attach.append({'scope': 'cms', 'name': child})
client.set_status(scope='cms', name=did, open=True)
client.attach_dids(scope='cms', name=did, dids=dids_to_attach)
client.set_status(scope='cms', name=did, open=False)


"""
CREATE replicas in T0_CH_CERN_Tape or any other RSE
INPUT: files.txt
/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/90001/17F92391-80F9-9947-A700-6E10C2EFC6BF.root,253165146,bd624b5c
/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/30000/6A5C5723-9B10-E346-B3E3-92DDEE46793C.root,2052471395,24ca1b15
/store/mc/RunIIFall18wmLHEGS/DYJetsToLL_M-105To160_VBFFilter_TuneCP5_PSweights_13TeV-amcatnloFXFX-pythia8/GEN-SIM/VBFPostMGFilter_102X_upgrade2018_realistic_v11_ext1-v1/30000/BB2A8199-56F7-6943-9A79-E2F0DCDA47FD.root,495981052,8b681234
"""
from rucio.client import Client
client = Client()


file_dids = [] # 4602 files
with open("./files.txt", "r") as files:
for file in files:
file_name = file.strip().split(',')[0]
file_size = int(file.strip().split(',')[1])
file_adler35 = file.strip().split(',')[2]
client.add_replica(rse='T0_CH_CERN_Tape', scope='cms', name=file_name, bytes_=file_size, adler32=file_adler32)

0 comments on commit 1e83e37

Please sign in to comment.