copied files from sf-dap repository

This commit is contained in:
Dmitry Ozerov
2023-08-14 12:52:08 +02:00
parent a05be1c8b8
commit 307202e657
10 changed files with 857 additions and 0 deletions

221
ap/ap.py Normal file
View File

@ -0,0 +1,221 @@
import argparse
import time
import os
import json
from datetime import datetime
from subprocess import Popen
from random import choice
from glob import glob
parser = argparse.ArgumentParser()
parser.add_argument("--beamline", default="alvra", help="beamline")
parser.add_argument("--pgroup", default="p18674", help="pgroup")
parser.add_argument("--detector", default="JF06T08V02", help="detector")
parser.add_argument("--logbook", default=None, help="url to logbook")
parser.add_argument("--online_hits_threshold", default = 15, type=int, help="number of peaks to consider frame a hit")
args = parser.parse_args()
beamline = args.beamline
pgroup = args.pgroup
detector = args.detector
online_hits_threshold = args.online_hits_threshold
logbook_url = args.logbook
#credential_files = ["credentials-3.json", "credentials-1.json", "credentials-2.json"]
credential_files = glob("credentials-*.json")
raw_directory = f'/sf/{beamline}/data/{pgroup}/raw'
if not os.path.exists(raw_directory):
print(f'Something wrong with beamline pgroup ({raw_directory}) or permision')
exit()
last_run_file = f'{raw_directory}/run_info/LAST_RUN'
if not os.path.exists(last_run_file):
print(f'last run file does not exists {last_run_file}')
last_run=0
#exit()
else:
with open(last_run_file, "r") as run_file:
last_run = int(run_file.read())
for run in range(last_run,0,-1):
#for run in range(last_run,360,-1):
data_directory = glob(f'{raw_directory}/run{run:04}*')
if len(data_directory) != 1:
continue
data_directory=data_directory[0]
for run_info_file in glob(f'{data_directory}/meta/acq*'):
if not os.path.exists(run_info_file):
continue
try:
with open(run_info_file) as json_file:
run_parameters = json.load(json_file)
except:
continue
unique_run_number = run_parameters["unique_acquisition_run_number"]
log_file = f'output/run{unique_run_number:06}.base'
if os.path.exists(log_file):
continue
start_pulse_id = run_parameters["start_pulseid"]
stop_pulse_id = run_parameters["stop_pulseid"]
rate_multiplicator = run_parameters.get("rate_multiplicator", 1)
detector_rate = 100//rate_multiplicator
user_tag = run_parameters.get("user_tag", None)
run_number = run_parameters["run_number"]
acq_number = run_parameters.get("acquisition_number", None)
cell_name = run_parameters.get("cell_name", "")
if cell_name == "":
cell_name = "no_cell"
motor_name = None
motor_value = None
if "scan_info" in run_parameters:
motor_name = run_parameters["scan_info"].get("name", None)
motor_value = run_parameters["scan_info"].get("scan_readbacks", None)
request_time = run_parameters.get("request_time", str(datetime.now()))
try:
request_time = datetime.strptime(request_time, '%Y-%m-%d %H:%M:%S.%f')
except:
request_time = datetime.strptime(request_time, '%Y-%m-%d %H:%M:%S')
now = datetime.now()
if (now-request_time).total_seconds() < 30:
time.sleep(30)
trun = request_time.strftime('%m-%d_%H:%M')
process_log_file = open(f'output/run{unique_run_number:06}.base.out', 'w')
f_log = open(log_file, "w")
credential_file = choice(credential_files)
log_run = f'python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url {logbook_url} --unique_run {unique_run_number} --run_number {run_number} --acq_number {acq_number} --user_tag {user_tag} --cell_name {cell_name} --time_run_taken {trun} --motor_name {motor_name} --motor_value {motor_value} --credentials {credential_file}'
process=Popen(log_run, shell=True, stdout=process_log_file, stderr=process_log_file)
print(log_run, file = f_log)
f_log.close()
process_log_file.close()
time.sleep(3)
if os.path.exists("NO_INDEXING"):
print("NO_INDEXING file present, do not run indexing")
exit()
for run in range(1, last_run+1):
data_directory = glob(f'{raw_directory}/run{run:04}*')
if len(data_directory) != 1:
continue
data_directory=data_directory[0]
for run_info_file in glob(f'{data_directory}/meta/acq*'):
if not os.path.exists(run_info_file):
continue
try:
with open(run_info_file) as json_file:
run_parameters = json.load(json_file)
except:
continue
unique_run_number = run_parameters["unique_acquisition_run_number"]
for laser in ["dark", "light"]:
log_file = f'output/run{unique_run_number:06}.index.{laser}'
if os.path.exists(log_file):
continue
dir_name = data_directory.split("/")[-1]
acq_number = run_parameters.get("acquisition_number", None)
user_tag = run_parameters.get("user_tag", None)
cell_name = run_parameters.get("cell_name", "")
if cell_name == "":
cell_name = "no_cell"
start_pulse_id = run_parameters["start_pulseid"]
stop_pulse_id = run_parameters["stop_pulseid"]
dap_file = f"{data_directory}/data/acq{acq_number:04}.{detector}.dap"
frame_list_file = f'{data_directory}/data/acq{acq_number:04}.{detector}.{laser}.lst'
if os.path.exists(frame_list_file):
all_dark = []
all_light = []
hits_dark = []
hits_light = []
if os.path.exists(dap_file):
with open(dap_file, 'r') as hits_file:
all_lines = hits_file.read().splitlines()
for line in all_lines:
lp = line.split()
pulseid, frame_good, n_peaks_online, laser_on = lp[0], lp[1], lp[2], lp[3]
pulseid = int(pulseid)
if pulseid < start_pulse_id or pulseid > stop_pulse_id:
continue
laser_on = True if laser_on == "True" else False
n_peaks_online = int(n_peaks_online)
if laser_on:
all_light.append(pulseid)
else:
all_dark.append(pulseid)
if n_peaks_online >= online_hits_threshold:
if laser_on:
hits_light.append(pulseid)
else:
hits_dark.append(pulseid)
hitrate_dark = len(hits_dark)/(len(all_dark)+0.01)
hitrate_light = len(hits_light)/(len(all_light)+0.01)
if laser == "dark":
hitrate_value = hitrate_dark
else:
hitrate_value = hitrate_light
nframes = 0
with open(frame_list_file, "r") as fr_file:
nframes= len(fr_file.readlines())
process_log_file = open(f'output/run{unique_run_number:06}.index.{laser}.out', 'w')
f_log = open(log_file, "w")
credential_file = choice(credential_files)
log_laser = f'python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url {logbook_url} --unique_run {unique_run_number} --number_frames {nframes} --hits_rate {hitrate_value} --laser {laser} --credentials {credential_file}'
process=Popen(log_laser, shell=True, stdout=process_log_file, stderr=process_log_file)
print(log_laser, file = f_log)
#slurm_partition = choice(["prod-aramis", "prod-aramis", "prod-aramis", "prod-athos"])
slurm_partition = choice(["prod-aramis", "prod-aramis", "prod-aramis"])
#log_index = f'sbatch --exclusive -p {slurm_partition} -J {unique_run_number}_{laser} -e output/run{unique_run_number:06}.index.{laser}.slurm.err -o output/run{unique_run_number:06}.index.{laser}.slurm.out /sf/jungfrau/applications/sf-dap/sfx/processing/index_data.sh {dir_name}/index/{laser} {acq_number} {frame_list_file} {user_tag} {unique_run_number}'
log_index = f'sbatch --exclusive -p {slurm_partition} -J {unique_run_number}_{laser} -e output/run{unique_run_number:06}.index.{laser}.slurm.err -o output/run{unique_run_number:06}.index.{laser}.slurm.out /sf/jungfrau/applications/sf-dap/sfx/processing/index_data.sh {dir_name}/index/{laser} {acq_number} {frame_list_file} {cell_name} {unique_run_number}'
print(log_index, file = f_log)
process=Popen(log_index, shell=True, stdout=process_log_file, stderr=process_log_file)
f_log.close()

229
ap/update-spreadsheet.py Normal file
View File

@ -0,0 +1,229 @@
import argparse
import gspread
import time
import os
# from: https://stackoverflow.com/questions/23861680/convert-spreadsheet-number-to-column-letter
def num_to_col_letters(num):
letters = ''
while num:
mod = (num - 1) % 26
letters += chr(mod + 65)
num = (num - 1) // 26
return ''.join(reversed(letters))
backgroundColorTitle={ "backgroundColor": { "red" : 182./255., "green" : 215./255., "blue" : 168./255.} }
parser = argparse.ArgumentParser()
parser.add_argument("--credentials", default="credentials-1.json", help="json file with credentials for spreadsheet. https://gspread.readthedocs.io/en/latest/oauth2.html")
parser.add_argument("--worksheet", default="Logbook", help="name of worksheet, default Stats")
parser.add_argument("--url", default=None, help="url to spreadsheet")
parser.add_argument("--setup", action='store_true', help="prepare spearsheet for experiment")
parser.add_argument("--unique_run", default=None, type=int, help="unique run number, default None")
parser.add_argument("--laser", default="dark", help="dark or light, default dark")
parser.add_argument("--row", default=2, type=int, help="row number with the names of the variables. default=2")
parser.add_argument("--number_frames", default=None, type=int, help="Number of frames")
parser.add_argument("--hits_rate", default=None, type=float, help="Hits rate")
parser.add_argument("--number_indexed", default=None, type=int, help="Number of indexed")
parser.add_argument("--number_indexed_alternative", default=None, type=int, help="Number of indexed in alternative indexing")
parser.add_argument("--resolution_max", default=None, help="Resolution max")
parser.add_argument("--resolution_min", default=None, help="Resolution min")
parser.add_argument("--resolution_mean", default=None, help="Resolution mean")
parser.add_argument("--user_tag", default=None, help="User tag")
parser.add_argument("--time_run_taken", default=None, help="When run was started")
parser.add_argument("--run_number", default=None, help="run number")
parser.add_argument("--acq_number", default=None, help="acqusition number")
parser.add_argument("--cell_name", default=None, help="Name of cell(protein) to use for automatic indexing")
parser.add_argument("--motor_name", default=None, help="motor name")
parser.add_argument("--motor_value", default=None, help="motor value")
args = parser.parse_args()
if not os.path.isfile(args.credentials):
print("no %s credential file, exit" % args.credentials)
exit(1)
possible_row_run_names = ['RUN number', 'Run #']
what_to_insert_laser = {"# Frames": args.number_frames, "HitsRate": args.hits_rate, "indexed #": args.number_indexed,
"alternative indexed #": args.number_indexed_alternative,
"resolution max": args.resolution_max, "resolution min": args.resolution_min, "resolution mean": args.resolution_mean}
what_to_insert = {"run number": args.run_number, "acq number": args.acq_number, "User tag": args.user_tag, "Cell": args.cell_name, "Time": args.time_run_taken, "Motor name": args.motor_name, "Motor value": args.motor_value}
what_to_insert_in_summary = ["User tag", "First RUN number", "Last RUN number", "index rate dark", "index rate light", "total images light", "indexed images light","total images dark", "indexed images dark", "row number first", "row number last"]
what_to_find = ["User tag", "RUN number", "# Frames light", "indexed # light", "# Frames dark", "indexed # dark"]
lett_col = []
summary = None
itry = 0
ierror = 1
while itry<2 and ierror == 1:
itry += 1
ierror = 0
try:
gc = gspread.service_account(filename=args.credentials)
if args.url:
spreadsheet = gc.open_by_url(args.url)
else:
print("no spreadsheet defined, exit")
exit(1)
worksheet_list = spreadsheet.worksheets()
worksheet_found = False
summary_found = False
for w in worksheet_list:
if w.title == args.worksheet:
worksheet_found = True
if w.title == 'Datasets summary':
summary_found = True
if not worksheet_found:
if not args.setup:
print("worksheet %s not found in spreasheet" % args.worksheet)
exit(1)
worksheet = spreadsheet.add_worksheet(title=args.worksheet,rows="10000",cols="100")
else:
worksheet = spreadsheet.worksheet(args.worksheet)
if not summary_found:
summary = spreadsheet.add_worksheet(title='Datasets summary',rows="100",cols="100")
except Exception as e:
ierror = 1
time.sleep(2)
if ierror == 1:
print("Error in opening google spreadsheet")
exit(1)
row_names = []
row_names_summary = []
formula_names_summary = []
try:
row_names = worksheet.row_values(args.row)
if summary:
row_names_summary = summary.row_values(1)
formula_names_summary = summary.row_values(2)
except:
pass
row_run = None
for run_row_name in possible_row_run_names:
if run_row_name in row_names:
row_run = row_names.index(run_row_name)+1
if row_run == None and args.setup:
row_run = len(row_names)+1
row_run_name = num_to_col_letters(row_run)
worksheet.update_cell(args.row, row_run, possible_row_run_names[0])
worksheet.format(f'{row_run_name}{args.row}', backgroundColorTitle)
N_RUNS_INITIAL=5000
r_ins = [ [i] for i in range(1,1+N_RUNS_INITIAL+1)]
worksheet.update(f'{row_run_name}{args.row+1}:{row_run_name}{args.row+1+N_RUNS_INITIAL}', r_ins)
time.sleep(10)
if row_run != None:
column_names = worksheet.col_values(row_run)
else:
print("Cannot find row with the pre-defined run numbers")
exit()
if args.setup:
for wht in what_to_insert:
if wht not in row_names:
wht_col = len(worksheet.row_values(args.row))+1
worksheet.update_cell(args.row, wht_col, wht)
wht_col_name = num_to_col_letters(wht_col)
worksheet.format(f'{wht_col_name}{args.row}', backgroundColorTitle)
time.sleep(10)
for laser in ['dark', 'light']:
for wht in what_to_insert_laser:
wht = wht + " " + laser
if wht not in row_names:
wht_col = len(worksheet.row_values(args.row))+1
worksheet.update_cell(args.row, wht_col, wht)
wht_col_name = num_to_col_letters(wht_col)
worksheet.format(f'{wht_col_name}{args.row}', backgroundColorTitle)
time.sleep(10)
if summary:
for index, wht in enumerate(what_to_insert_in_summary):
if wht not in row_names_summary:
summary.update_cell(1, index+1, wht)
if index < len(what_to_insert_in_summary)-2:
wht_col_name = num_to_col_letters(index+1)
summary.format(f'{wht_col_name}1', backgroundColorTitle)
for wht in what_to_find:
cell = worksheet.find(wht)
lett_col.append(num_to_col_letters(cell.col))
formulas_in_summary = ["=INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[0] + "\"&J2 &\"\")",
"=INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[1] + "\"&J2 &\"\")",
"=INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[1] + "\"&K2 &\"\")",
"=I2/(H2+0.001)",
"=G2/(F2+0.001)",
"=SUM(INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[2] + "\"&J2 &\"\"):INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[2] + "\"&K2 &\"\"))",
"=SUM(INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[3] + "\"&J2 &\"\"):INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[3] + "\"&K2 &\"\"))",
"=SUM(INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[4] + "\"&J2 &\"\"):INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[4] + "\"&K2 &\"\"))",
"=SUM(INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[5] + "\"&J2 &\"\"):INDIRECT(\"" + args.worksheet + "!\"&\"" + lett_col[5] + "\"&K2 &\"\"))"]
time.sleep(10)
for index, wht in enumerate(formulas_in_summary):
if wht not in formula_names_summary:
summary.update_cell(2, index+1, wht)
exit()
if not args.unique_run and not args.setup:
print("there should be run number, with --unique_run provided")
exit(1)
run_number = str(args.unique_run)
if run_number in column_names:
column_index = column_names.index(run_number)+1
else:
print("Run %s not found in spreadsheet" % run_number)
exit()
for wht in what_to_insert_laser:
if what_to_insert_laser[wht] != None:
name = wht+" "+args.laser
if name in row_names:
insert_row = row_names.index(name)+1
else:
print("%s not found in the table" % name)
exit()
worksheet.update_cell(column_index, insert_row, what_to_insert_laser[wht])
for wht in what_to_insert:
if what_to_insert[wht] != None:
name = wht
if name in row_names:
insert_row = row_names.index(name)+1
else:
print("%s not found in the table" % name)
exit()
worksheet.update_cell(column_index, insert_row, what_to_insert[wht])

50
scripts/ap.sh Executable file
View File

@ -0,0 +1,50 @@
#!/bin/bash
. ./env_setup.sh
while true
do
echo "Running "`date`
python /sf/jungfrau/applications/sf-dap/sfx/processing/automatic2.py --pgroup ${PGROUP} --beamline ${BEAMLINE} --detector ${DETN} --logbook ${LOGBOOK}
cd output
for i in `ls *base.out 2>/dev/null`
do
if [ -s $i ]
then
f=`echo $i | sed 's/\.out//'`
cp $f a.sh
chmod +x a.sh
cd ..
output/a.sh > output/$i.out 2>&1
cd output
rm -rf a.sh
rm -rf $i
if [ -s $i.out ]
then
mv $i.out $i
else
rm -rf $i.out
fi
else
rm -rf $i
fi
done
cd ..
rm -rf CURRENT_JOBS.txt
echo " statistics at "`date` >> CURRENT_JOBS.txt
echo " Running jobs " >> CURRENT_JOBS.txt
squeue | grep " R " | awk '{print $3" "$6}' >> CURRENT_JOBS.txt
echo " " >> CURRENT_JOBS.txt
echo " Pending jobs " >> CURRENT_JOBS.txt
squeue | grep " PD " | awk '{print $3}' >> CURRENT_JOBS.txt
cd output
/sf/jungfrau/applications/sf-dap/sfx/processing/wip/re-insert-spearsheet.sh
cd ..
/sf/jungfrau/applications/sf-dap/sfx/processing/wip/re-insert-spearsheet.2.sh
echo "Sleeping "`date`
sleep 120
done

28
scripts/env_setup.sh Executable file
View File

@ -0,0 +1,28 @@
#!/bin/bash
DIRNAME=`dirname ${BASH_SOURCE}`
#export PGROUP=`cat ${DIRNAME}/PGROUP`
export PGROUP=...
export BEAMLINE=alvra
# alvra bernina cristallina furka maloja
BASEDIR=/sf/${BEAMLINE}/data/${PGROUP}/res
source /sf/jungfrau/applications/miniconda3/etc/profile.d/conda.sh
conda activate sf-dap
export DETN=JF06T08V03
# JF06T08V03 JF06T32V03 JF17T16V01
export GEOM_FILE=${DETN}.geom
#for Alvra(rounding 0.25): 4000 and lower
#for Cristallina (photon counts): 10
# edit run_index.*.sh file(s) for indexing options
export THRESHOLD_INDEXING=10
#LOGBOOK="https://docs.google.com/spreadsheets/...."
SRCDIR=$PWD

140
scripts/index_data.sh Normal file
View File

@ -0,0 +1,140 @@
#!/bin/bash
##SBATCH -p hour
##SBATCH -t 1:00:00
#SBATCH -e output/index_data.%j.err
#SBATCH -o output/index_data.%j.out
##SBATCH -x sf-cn-[5-7]
. ./env_setup.sh
DIRN=$1
RUN=$2
FILELST=$3
PROTEIN_NAME=$4
UNIQUERUN=$5
OUTDIR=`echo ${DIRN} | sed 's/data/index/'`
FR=`printf "%04d" ${RUN}`
FILEN=acq${FR}
RUNNR=`echo ${DIRN} | awk -F "/" '{print $1}' | awk -F "-" '{print $1}'`
hostname
if [ -e ${FILELST} ]
then
mkdir -p ${BASEDIR}/${OUTDIR}
cd ${BASEDIR}/${OUTDIR}
. /sf/jungfrau/applications/sf-dap/sfx/processing/load_crystfel.sh
cp ${SRCDIR}/${GEOM_FILE} ${FILEN}.geom
grep ${RUNNR} ${SRCDIR}/DETECTOR_DISTANCE.txt > /dev/null
if [ $? = 0 ]
then
DETECTOR_DISTANCE=`grep ${RUNNR} ${SRCDIR}/DETECTOR_DISTANCE.txt | tail -1 | awk '{print $2}'`
else
DETECTOR_DISTANCE=`grep DEFAULT ${SRCDIR}/DETECTOR_DISTANCE.txt | tail -1 | awk '{print $2}'`
fi
sed -i "s:clen.*:clen = ${DETECTOR_DISTANCE}:g" ${FILEN}.geom
grep ${RUNNR} ${SRCDIR}/BEAM_ENERGY.txt > /dev/null
if [ $? = 0 ]
then
BEAM_ENERGY=`grep ${RUNNR} ${SRCDIR}/BEAM_ENERGY.txt | tail -1 | awk '{print $2}'`
else
BEAM_ENERGY=`grep DEFAULT ${SRCDIR}/BEAM_ENERGY.txt | tail -1 | awk '{print $2}'`
fi
sed -i "s:photon_energy.*:photon_energy = ${BEAM_ENERGY}:g" ${FILEN}.geom
if [ -e ${SRCDIR}/CELL/${PROTEIN_NAME}.cell ]
then
cp ${SRCDIR}/CELL/${PROTEIN_NAME}.cell ${FILEN}.cell
fi
rm -rf ${FILEN}.stream
if [ -e ${SRCDIR}/run_index.${PROTEIN_NAME}.sh ]
then
${SRCDIR}/run_index.${PROTEIN_NAME}.sh ${FILELST} ${FILEN}.geom ${FILEN}.cell ${FILEN}.stream > ${FILEN}.out 2>${FILEN}.err
else
${SRCDIR}/run_index.sh ${FILELST} ${FILEN}.geom ${FILEN}.cell ${FILEN}.stream > ${FILEN}.out 2>${FILEN}.err
fi
module purge
laser=dark
echo ${FILELST} | grep light.lst > /dev/null
if [ $? = 0 ]
then
laser=light
fi
grep Final ${FILEN}.err > /dev/null
if [ $? = 0 ]
then
n_indexed=`grep Final ${FILEN}.err | awk '{print $8}'`
echo python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url ${LOGBOOK} --unique_run ${UNIQUERUN} --laser ${laser} --number_indexed ${n_indexed} --credentials ${SRCDIR}/credentials.json
python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url ${LOGBOOK} --unique_run ${UNIQUERUN} --laser ${laser} --number_indexed ${n_indexed} --credentials ${SRCDIR}/credentials.json
fi
/sf/jungfrau/applications/sf-dap/sfx/processing/ave-resolution ${FILEN}.stream > ${FILEN}.resolution.tmp
if [ $? == 0 ]
then
mean_res=`grep Mean ${FILEN}.resolution.tmp | awk '{print $5}'`
best_res=`grep Best ${FILEN}.resolution.tmp | awk '{print $5}'`
worst_res=`grep Worst ${FILEN}.resolution.tmp | awk '{print $5}'`
echo $mean_res $best_res $worst_res > ${FILEN}.resolution
else
echo "nan nan nan" > ${FILEN}.resolution
fi
read r1 r2 r3 <<< $(cat ${FILEN}.resolution)
echo python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url ${LOGBOOK} --unique_run ${UNIQUERUN} --laser ${laser} --resolution_min ${r3} --resolution_max ${r2} --resolution_mean ${r1} --credentials ${SRCDIR}/credentials.json
python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url ${LOGBOOK} --unique_run ${UNIQUERUN} --laser ${laser} --resolution_min ${r3} --resolution_max ${r2} --resolution_mean ${r1} --credentials ${SRCDIR}/credentials.json
if [ -e ${SRCDIR}/CELL/${PROTEIN_NAME}.cell_alternative ]
then
echo "Running alternative cell indexing"
. /sf/jungfrau/applications/sf-dap/sfx/processing/load_crystfel.sh
cp ${SRCDIR}/CELL/${PROTEIN_NAME}.cell_alternative ${FILEN}.cell_alternative
diff ${FILEN}.cell ${FILEN}.cell_alternative > /dev/null
if [ $? != 0 ]
then
rm -rf ${FILEN}.stream_alternative
if [ -e ${SRCDIR}/run_index.${PROTEIN_NAME}.sh ]
then
${SRCDIR}/run_index.${PROTEIN_NAME}.sh ${FILELST} ${FILEN}.geom ${FILEN}.cell_alternative ${FILEN}.stream_alternative > ${FILEN}.out_alternative 2>${FILEN}.err_alternative
else
${SRCDIR}/run_index.sh ${FILELST} ${FILEN}.geom ${FILEN}.cell_alternative ${FILEN}.stream_alternative > ${FILEN}.out_alternative 2>${FILEN}.err_alternative
fi
rm -rf ${FILEN}.cell_alternative
grep Final ${FILEN}.err_alternative > /dev/null
if [ $? = 0 ]
then
n_indexed_alternative=`grep Final ${FILEN}.err_alternative | awk '{print $8}'`
echo python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url ${LOGBOOK} --unique_run ${UNIQUERUN} --laser ${laser} --number_indexed_alternative ${n_indexed_alternative} --credentials ${SRCDIR}/credentials.json
python /sf/jungfrau/applications/sf-dap/sfx/processing/update-spreadsheet-per-run.py --url ${LOGBOOK} --unique_run ${UNIQUERUN} --laser ${laser} --number_indexed_alternative ${n_indexed_alternative} --credentials ${SRCDIR}/credentials.json
fi
else
echo "Alternative cell is the same as main cell"
fi
fi
rm -rf ${FILEN}.list ${FILEN}.geom ${FILEN}.cell ${FILEN}.resolution.tmp
else
echo "File ${FILELST} doesnt exists"
fi

2
scripts/load_crystfel.sh Normal file
View File

@ -0,0 +1,2 @@
module use MX
module load crystfel/0.10.2

48
scripts/prepare.sh Executable file
View File

@ -0,0 +1,48 @@
#!/bin/bash
SRC=/sf/jungfrau/applications/sf-dap/sfx/processing
chmod g+w `pwd`
for d in CELL output
do
if [ ! -d ${d} ]
then
mkdir ${d}
fi
done
for f in env_setup.sh run_index.sh
do
if [ ! -e $f ]
then
cp ${SRC}/$f $f
fi
done
echo "Edit file env_setup.sh with correct information (PGROUP, DETECTOR_NAME, BEAMLINE, THRESHOLD, LOGBOOK_URL)"
echo "Put proper geom file as <DETECTOR_NAME>.geom (e.g. JF17T16V01.geom)"
echo "Copy cell files to CELL/ directory (example : lyso.cell, hewl.cell....)"
echo "Make files credentials.json, credentials-*.json (e.g. credentials-1.json, credentials-2.json) with the api key to access logbook"
if [ ! -e DETECTOR_DISTANCE.txt ]
then
echo "DEFAULT 0.09369" > DETECTOR_DISTANCE.txt
echo Make some good guess for detector distance in DETECTOR_DISTANCE.txt file
fi
if [ ! -e BEAM_ENERGY.txt ]
then
echo "DEFAULT 11330.0" > BEAM_ENERGY.txt
echo Make proper beam energy in BEAM_ENERGY.txt file
fi
for f in env_setup.sh run_index.sh DETECTOR_DISTANCE.txt BEAM_ENERGY.txt
do
chmod g+w $f
done
chmod -R g+w CELL

View File

@ -0,0 +1,25 @@
#!/bin/bash
for i in {0001..5000}
do
for l in light dark
do
f=output/run00${i}.index.${l}.out
if [ -e ${f} ]
then
grep -v docs.google ${f} | egrep " google|Cannot find|gspread.exceptions.APIError" > /dev/null
a=$?
if [ $a == 0 ]
then
b=`echo $f | sed 's/\.out//'`
grep python $b | grep credentials > a
if [ -s a ]
then
chmod +x a
./a > $f
fi
fi
fi
done
done

33
scripts/re-insert-spearsheet.sh Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
mkdir -p re-try
for i in {0001..5000}
do
for l in light dark
do
f=run00${i}.index.${l}.slurm.out
if [ -e ${f} ]
then
grep -v docs.google ${f} | egrep " google|Cannot find" > /dev/null
a=$?
b=1
if [ -s run00${i}.index.${l}.slurm.err ]
then
b=0
fi
if [ $a == 0 ] || [ $b == 0 ]
then
grep python $f | grep credentials.json > a
if [ -s a ]
then
chmod +x a
./a > run00${i}.index.${l}.slurm.err
grep -v " google" $f | grep -v "Cannot find" > b
mv b $f
mv a re-try/run00${i}.`date +%s`.sh
fi
fi
fi
done
done

81
scripts/run_index.sh Executable file
View File

@ -0,0 +1,81 @@
#!/bin/bash
NP=`grep processor /proc/cpuinfo | wc -l`
if [ $# -ge 1 ]
then
FILELST=$1
else
FILELST=file.lst
fi
if [ $# -ge 2 ]
then
FILEGEOM=$2
else
FILEGEOM=geom.geom
fi
if [ $# -ge 3 ]
then
FILECELL=$3
else
FILECELL=cell.cell
fi
if [ $# -ge 4 ]
then
FILESTREAM=$4
else
FILESTREAM=stream.stream
fi
# list of indexing methods : xgandalf-latt-cell,asdf-latt-cell,dirax-latt-cell,mosflm-latt-cell
# but usually all is indexed by xgandalf (99.99%)
# to speed up indexing, one can use : --xgandalf-fast-execution, --highres=1.6 ...
if [ -e ${FILECELL} ]
then
indexamajig -i ${FILELST} \
-o ${FILESTREAM} \
--geometry=${FILEGEOM} \
--pdb=${FILECELL} \
--indexing=xgandalf-latt-cell \
--peaks=peakfinder8 \
--integration=rings-grad \
--tolerance=10.0,10.0,10.0,2,3,2 \
--threshold=${THRESHOLD_INDEXING} \
--min-snr=5 \
--int-radius=2,3,6 \
-j ${NP} \
--no-multi \
--no-retry \
--check-peaks \
--max-res=3000 \
--min-pix-count=1 \
--local-bg-radius=4
else
indexamajig -i ${FILELST} \
-o ${FILESTREAM} \
--geometry=${FILEGEOM} \
--indexing=xgandalf-latt-cell \
--peaks=peakfinder8 \
--integration=rings-grad \
--tolerance=10.0,10.0,10.0,2,2,2 \
--threshold=${THRESHOLD_INDEXING} \
--min-snr=5 \
--int-radius=2,3,6 \
-j ${NP} \
--no-multi \
--no-retry \
--check-peaks \
--max-res=3000 \
--min-pix-count=1 \
--local-bg-radius=4
fi