Integrate longitudinal high-dimensional data in a data warehouse using the git commit graph to store temporal information and git annex to store large data in a content-addressable fashion.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

814 lines
22KB

  1. #!/bin/bash
  2. # © Copyright 2024 Maxime Wack
  3. # This file is part of git ommix.
  4. # Git ommix is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  5. # Git ommix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  6. # You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
  7. ###############################################################################
  8. #
  9. # Add functions
  10. # Add dispatch
  11. function add
  12. {
  13. echo -e "\033[0;32mAdding $OBJECT:$ID\033[0m"
  14. base_prov
  15. case "$OBJECT" in
  16. patient)
  17. add_patient ;;
  18. sample)
  19. validate_patient
  20. add_sample ;;
  21. data)
  22. validate_patient
  23. validate_sample
  24. validate_files
  25. add_data ;;
  26. result)
  27. validate_patient
  28. validate_sample
  29. validate_files
  30. # TODO: marche aussi avec juste un revision_of sans sample ?
  31. # A result object needs to USE a data object
  32. [[ "${#USE[@]}" -gt 0 ]] || die "Please --use data when adding a result"
  33. add_result ;;
  34. diagnosis)
  35. validate_patient
  36. # A diagnosis needs to be at least revision_of another diagnosis, or use results or diagnosis,
  37. [[ "${#USE[@]}" -gt 0 ]] || [[ "$REVISION_OF" ]] || die "Please --use result|diagnosis or --revision_of diagnosis when adding a diagnosis"
  38. add_diagnosis ;;
  39. *) usage $VERB ;;
  40. esac
  41. commit
  42. echo -e "\033[0;32mAdded $OBJECT:$ID\033[0m
  43. "
  44. }
  45. # Add patient
  46. # Uses :
  47. # - ID
  48. function add_patient
  49. {
  50. # Create the patient repo dir and data and result subdirs
  51. dbg mkdir --parents "$ID/data" || die "Unable to create a directory in the git ommix base directory at: $GIT_OMMIX_REPO_DIR
  52. Please check your permissions"
  53. dbg mkdir --parents "$ID/result"
  54. dbg touch "$ID/data/.empty"
  55. dbg touch "$ID/result/.empty"
  56. # Go to the patient repo
  57. dbg cd "$ID"
  58. # Init git, git annex
  59. dbg git init
  60. dbg git annex init
  61. # Set some default options :
  62. # - make `cat` the default pager makes it easier to parse outputs
  63. # - set rule for annex usage
  64. dbg git config core.pager cat
  65. dbg git config advice.detachedHead false
  66. dbg git annex config --set annex.largefiles "$GIT_OMMIX_LARGE_FILES"
  67. # Rename the default branch to the patient ID
  68. dbg git checkout -b "patient/$ID"
  69. dbg git add *
  70. }
  71. # Add sample
  72. # Uses :
  73. # - ID
  74. # - PATIENT
  75. function add_sample
  76. {
  77. # A sample always derives from the patient
  78. MESSAGE_ENTITY+="
  79. :$OBJECT:$ID prov:wasDerivedFrom :patient:$PATIENT ."
  80. if [[ "$METHOD" ]];then
  81. MESSAGE_ACTIVITY+="
  82. :method:$METHOD prov:used :patient:$PATIENT ."
  83. fi
  84. # Create a new sample branch stemming from the patient branch
  85. dbg stash git checkout -b "sample/$ID" "patient/$PATIENT" || die "Sample already exists"
  86. }
  87. # Add data
  88. # Uses :
  89. # - PATIENT
  90. # - SAMPLE
  91. # - [ID]
  92. # - [REVISION_OF data:<id> | <data_hash>]
  93. # - [INVALIDATE data:<id> | <data_hash>] * n
  94. function add_data
  95. {
  96. # Data always derives from the sample
  97. MESSAGE_ENTITY+="
  98. :$OBJECT:$ID prov:wasDerivedFrom :sample:$SAMPLE ."
  99. if [[ "$METHOD" ]];then
  100. MESSAGE_ACTIVITY+="
  101. :method:$METHOD prov:used :sample:$SAMPLE ."
  102. fi
  103. # REVISION_OF
  104. [[ "$REVISION_OF" ]] && revision_of
  105. # INVALIDATEs
  106. [[ "${#INVALIDATE[@]}" -gt 0 ]] && invalidate
  107. # Copy all the given files to the $OBJECT folder recursively
  108. # FILES is unquoted to allow * expansion
  109. dbg cp -t "$GIT_OMMIX_REPO_DIR/$PATIENT/$OBJECT" ${FILES[@]} --recursive || die "Unable to copy files to the patient directory at: $GIT_OMMIX_REPO_DIR/$PATIENT/$OBJECT
  110. Please check your permissions"
  111. # Add the $OBJECT folder to git
  112. dbg git add "$OBJECT"
  113. }
  114. # Add result
  115. # Uses :
  116. # - PATIENT
  117. # - SAMPLE
  118. # - USE data:<id> | <data_hash> * n
  119. # - [ID]
  120. # - [REVISION_OF result:<id> | <result_hash>]
  121. # - [INVALIDATE result:<id> | <result_hash>] * n
  122. function add_result
  123. {
  124. # USE
  125. use data
  126. # REVISION_OF
  127. [[ "$REVISION_OF" ]] && revision_of
  128. # INVALIDATEs
  129. [[ "${#INVALIDATE[@]}" -gt 0 ]] && invalidate
  130. # Copy all the given files to the $OBJECT folder recursively
  131. # FILES is unquoted to allow * expansion
  132. dbg cp -t "$GIT_OMMIX_REPO_DIR/$PATIENT/$OBJECT" ${FILES[@]} --recursive || die "Unable to copy files to the patient directory at: $GIT_OMMIX_REPO_DIR/$PATIENT/$OBJECT
  133. Please check your permissions"
  134. # Add the $OBJECT folder to git
  135. dbg git add "$OBJECT"
  136. }
  137. # Add diagnosis
  138. # Uses :
  139. # - ID
  140. # - PATIENT
  141. # - USE result|diagnosis:<id> | <result|diagnosis_hash>
  142. # - [INVALIDATE diagnosis:<íd> | <diagnosis_hash>]
  143. # - [REVISION_OF diagnosis:<id> | <diagnosis_hash>]
  144. function add_diagnosis
  145. {
  146. local merges=()
  147. # USE
  148. if [[ "${#USE[@]}" -gt 0 ]]; then
  149. use "(result|diagnosis)"
  150. # Start merging from the first object used
  151. dbg stash git checkout "${HASH[0]}"
  152. merges+=("${HASH[@]}")
  153. fi
  154. # REVISION_OF
  155. if [[ "$REVISION_OF" ]]; then
  156. revision_of
  157. # Start merging from the revision_of
  158. dbg stash git checkout "$HASH"
  159. # Remove the diagnosis branch if it existed
  160. dbg git branch -D "diagnosis/${NAME/diagnosis:}"
  161. merges=("$HASH" "${merges[@]}")
  162. fi
  163. dbg git checkout -b "diagnosis/$ID"
  164. # INVALIDATEs
  165. if [[ "${#INVALIDATE[@]}" -gt 0 ]]; then
  166. invalidate
  167. local inval
  168. for inval in "${NAME[@]}"; do
  169. dbg git branch -D "diagnosis/${inval/diagnosis:}"
  170. done
  171. fi
  172. dbg git merge "${merges[@]}" --no-commit
  173. }
  174. ###############################################################################
  175. #
  176. # List functions
  177. # List dispatch
  178. function list
  179. {
  180. case "$OBJECT" in
  181. patient)
  182. list_patient ;;
  183. sample|data|result|diagnosis)
  184. validate_patient
  185. validate_hashes
  186. list_object ;;
  187. all)
  188. validate_patient
  189. validate_hashes
  190. list_all ;;
  191. *) usage $VERB ;;
  192. esac
  193. }
  194. # List patients
  195. # Uses : nothing
  196. function list_patient
  197. {
  198. local pat
  199. for pat in *; do
  200. dbg git -C "$pat" rev-parse && echo "$pat"
  201. done
  202. }
  203. # List objects
  204. # Uses :
  205. # - PATIENT
  206. # - [HASHES]
  207. function list_object
  208. {
  209. git log --format=%s --grep="^$OBJECT:" "${HASHES[@]}"
  210. }
  211. function list_all
  212. {
  213. git log --format=%s --grep='^\(patient\|sample\|data\|result\|diagnosis\)' "${HASHES[@]}"
  214. }
  215. ###############################################################################
  216. # Get functions
  217. # Get dispatch
  218. function get
  219. {
  220. validate_patient
  221. case "$OBJECT" in
  222. prov|last|timeline|object|graph|log|file)
  223. validate_hashes
  224. get_"$OBJECT" ;;
  225. sparql)
  226. HASHES="--all"
  227. get_sparql ;;
  228. *) usage $VERB ;;
  229. esac
  230. }
  231. # Return the provenance in turtle format
  232. # It is the concatenation of all commit messages (without the subject line)
  233. function get_prov
  234. {
  235. echo "@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
  236. @prefix foaf: <http://xmlns.com/foaf/0.1/> .
  237. @prefix prov: <http://www.w3.org/ns/prov#> .
  238. @prefix : <http://gitommix.org/gitommix#> .
  239. $(git log --format=%b ${HASHES[@]})" | \
  240. rapper -i turtle - gitommix -eqo turtle
  241. }
  242. function get_graph
  243. {
  244. QUERY=" CONSTRUCT {?s ?p ?o} WHERE
  245. {
  246. { # Entities
  247. ?s ?p ?o .
  248. ?s a prov:Entity .
  249. ?o a prov:Entity .
  250. }
  251. UNION
  252. { # Providers
  253. ?s ?p ?o .
  254. ?o a :provider .
  255. FILTER (?p = prov:wasAttributedTo)
  256. }
  257. UNION
  258. { # Methods
  259. ?s ?p ?o .
  260. ?o a :method .
  261. FILTER (?p = prov:wasGeneratedBy || ?p = prov:wasInvalidatedBy)
  262. }
  263. UNION
  264. { # Methods
  265. ?s ?p ?o .
  266. ?s a :method .
  267. FILTER (?p = prov:used)
  268. }
  269. UNION
  270. { # Providers with methods
  271. ?s ?p ?o .
  272. ?o a :provider .
  273. FILTER (?p = prov:wasAssociatedWith)
  274. }
  275. }"
  276. get_sparql | \
  277. turtle2dot | \
  278. dot -Tx11
  279. }
  280. function get_timeline
  281. {
  282. QUERY="CONSTRUCT {?s ?o ?p} WHERE
  283. {
  284. { ?s ?o ?p .
  285. ?s a :diagnosis .
  286. ?p a :diagnosis
  287. }
  288. UNION
  289. { ?s ?o ?p .
  290. ?s a :diagnosis .
  291. FILTER (?o = rdf:type && ?p = :diagnosis)
  292. }
  293. }"
  294. get_sparql | \
  295. turtle2dottime | \
  296. dot -Tx11
  297. }
  298. function get_object
  299. {
  300. git checkout "${HASHES[0]}"
  301. }
  302. # Get up-to-date result of a sample
  303. # Print the last commit
  304. function get_last
  305. {
  306. git branch --list 'diagnosis/*' --list 'sample/*' --format='%(subject)' --contains "${HASHES[@]}"
  307. }
  308. # Print git commit log
  309. function get_log
  310. {
  311. git log --oneline --graph --grep='^\(patient\|sample\|data\|result\|diagnosis\)' "${HASHES[@]}"
  312. }
  313. # Print the name of the files added by the objects
  314. function get_file
  315. {
  316. git diff-tree --no-commit-id --name-only "${HASHES[@]}" -r
  317. }
  318. function get_sparql
  319. {
  320. local prefix=$(get_prov | grep "^@prefix" | sed -E -e 's/@//' -e 's/ .$//')
  321. get_prov | \
  322. roqet -e "$prefix
  323. $QUERY" -D - -r turtle -q
  324. }
  325. function turtle2dottime
  326. {
  327. local HEADS=
  328. for branch in $(git for-each-ref --format='%(refname)' 'refs/heads/diagnosis/*'); do
  329. HEADS="$HEADS|$(git log --format=%s -1 $branch)"
  330. done
  331. HEADS="-e s/(^\t\"R(${HEADS:1})\" \[[^]]+)/\1, color = black, style = \"bold, filled\"/"
  332. local INVALIDATED=
  333. for invalid in $(git log --format=%b --all | grep invalidatedAtTime | cut -f 1 -d ' ');do
  334. INVALIDATED="$INVALIDATED|${invalid:1}"
  335. done
  336. INVALIDATED="-e s/(^\t\"R(${INVALIDATED:1})\" \[[^]]+)/\1, style = \"dashed, filled\"/"
  337. rapper -i turtle - gitommix -eqo dot | \
  338. sed -E \
  339. -e 's/^.*rdf:type.*$//' \
  340. -e 's/^\t"Rdiagnosis".*$//' \
  341. -e 's/rankdir = LR;/rankdir = RL;/' \
  342. -e 's/(charset="utf-8";)/\1\n\tnode [ shape = ellipse, color = "#808080", style = filled, fillcolor = white ];/' \
  343. -e 's/, shape = ellipse, color = blue//' \
  344. -e 's/label="\w+:/label="/' \
  345. -e 's/wasDerivedFrom//' \
  346. -e 's/(label="wasRevisionOf")/label="", weight = 2, style = dashed/' \
  347. -e 's/(^\t"Rdiagnosis:[^"]+" \[[^]]+)/\1, fillcolor = "#FF9999", group = diagnosis/' \
  348. $HEADS \
  349. $INVALIDATED \
  350. -e 's/(^\t"Rdiagnosis:[^"]+") -> ("Rdiagnosis:[^"]+")/\1:w -> \2:e/' \
  351. -e 's/^\tlabel=.*//'
  352. }
  353. function turtle2dot
  354. {
  355. local HEADS=
  356. for branch in $(git for-each-ref --format='%(refname)' 'refs/heads/patient/*' 'refs/heads/sample/*' 'refs/heads/diagnosis/*'); do
  357. HEADS="$HEADS|$(git log --format=%s -1 $branch)"
  358. done
  359. HEADS="-e s/(^\t\"R(${HEADS:1})\" \[[^]]+)/\1, color = black, style = \"bold, filled\"/"
  360. local INVALIDATED=
  361. for invalid in $(git log --format=%b --all | grep invalidatedAtTime | cut -f 1 -d ' ');do
  362. INVALIDATED="$INVALIDATED|${invalid:1}"
  363. done
  364. INVALIDATED="-e s/(^\t\"R(${INVALIDATED:1})\" \[[^]]+)/\1, style = \"dashed, filled\"/"
  365. local GRPS=()
  366. for group in $(git for-each-ref --format='%(refname:short)' 'refs/heads/sample/*'); do
  367. local COMMITS=
  368. for commit in $(git log --format=%s $group --invert-grep --grep '^patient'); do
  369. COMMITS="$COMMITS|$commit"
  370. done
  371. GRPS+=("-e s/(^\t\"R(${COMMITS:1})\" \[[^]]+)/\1, group = \"${group/sample\/}\"/")
  372. done
  373. rapper -i turtle - gitommix -eqo dot | \
  374. sed -E \
  375. -e 's/rankdir = LR;/rankdir = BT;/' \
  376. -e 's/(charset="utf-8";)/\1\n\tnode [ shape = ellipse, color = "#808080", style = filled, fillcolor = white ];/' \
  377. -e 's/, shape = ellipse, color = blue//' \
  378. -e 's/label="\w+:/label="/' \
  379. -e 's/wasDerivedFrom//' \
  380. -e 's/(label="wasRevisionOf")/label="", weight = 2, style = dashed/' \
  381. -e 's/(label="(wasGeneratedBy|used|wasAttributedTo|wasAssociatedWith)")/label="", weight = 0, color=gray/' \
  382. -e 's/(^\t"Rdata:[^"]+" \[[^]]+)/\1, fillcolor = "#9999FF"/' \
  383. -e 's/(^\t"Rresult:[^"]+" \[[^]]+)/\1, fillcolor = "#99FF99"/' \
  384. -e 's/(^\t"Rdiagnosis:[^"]+" \[[^]]+)/\1, fillcolor = "#FF9999", group = diagnosis/' \
  385. -e 's/(label="wasInvalidatedBy")/label="", weight = 0, style = dashed, color=gray/' \
  386. ${GRPS[@]} \
  387. $HEADS \
  388. $INVALIDATED \
  389. -e 's/(^\t"Rmethod:[^"]+" \[[^]]+)/\1, shape = rectangle, color = blue, fillcolor = "#9FB1FC"/' \
  390. -e 's/(^\t"Rprovider:[^"]+" \[[^]]+)/\1, shape = house, color = black, fillcolor = "#FED37F"/' \
  391. -e 's/(^\t"Rdata:[^"]+" -> "Rsample:[^]]+)/\1, weight = 5/' \
  392. -e 's/(^\t"Rsample:[^"]+" -> "Rpatient:[^]]+)/\1, weight = 5/' \
  393. -e 's/(^\t"Rdiagnosis:[^"]+" -> "Rresult:[^]]+)/\1, minlen = 3/' \
  394. -e 's/(^\t"Rsample:[^"]+") -> ("Rpatient:[^"]+")/\1:n -> \2/' \
  395. -e 's/(^\t"Rdata:[^"]+") -> ("Rsample:[^"]+")/\1:n -> \2:s/' \
  396. -e 's/(^\t"Rresult:[^"]+") -> ("Rdata:[^"]+")/\1:n -> \2:s/' \
  397. -e 's/(^\t"Rdiagnosis:[^"]+") -> ("Rdiagnosis:[^"]+")/\1:n -> \2:s/' \
  398. -e 's/(^\t"Rdiagnosis:[^"]+") -> ("Rresult:[^"]+")/\1 -> \2:s/' \
  399. -e 's/^\tlabel=.*//'
  400. }
  401. ###############################################################################
  402. #
  403. # Validation helpers
  404. # Validate PATIENT and switch to it
  405. function validate_patient
  406. {
  407. [[ "$PATIENT" ]] || die "Please provide a --patient"
  408. dbg git -C "$PATIENT" rev-parse || die "patient:$PATIENT is not a patient"
  409. cd "$PATIENT" || die "Unable to acces the patient directory at: $GIT_OMMIX_REPO_DIR/$PATIENT
  410. Please check your permissions"
  411. }
  412. # Validate SAMPLE and switch to it
  413. function validate_sample
  414. {
  415. [[ "$SAMPLE" ]] || die "Please provide a --sample"
  416. dbg git rev-parse --verify "sample/$SAMPLE" || die "sample:$SAMPLE is not a sample of patient:$PATIENT"
  417. dbg stash git checkout "sample/$SAMPLE" || die "Something wrong happened, unable to checkout sample/$SAMPLE of patient:$PATIENT"
  418. }
  419. # Validate FILES and qualify path
  420. function validate_files
  421. {
  422. [[ "${#FILES[@]}" -gt 0 ]] || die "Please provide files to add"
  423. # Qualify FILES with their full path
  424. for ((i = 0; i < ${#FILES[@]}; i++));do
  425. FILES[i]="$CURRENT_DIR/${FILES[i]}"
  426. done
  427. }
  428. # Validate a commit given by its hash or id. It must match a type.
  429. # Return the full name of the object
  430. # $1 : type
  431. # $2 : hash|id
  432. function validate_commit
  433. {
  434. # Remove prefix from names given in full
  435. HASH="$2"
  436. FULLNAME="$2"
  437. NAME="$(echo "$2" | sed -E "s/^$1://")"
  438. local ncommits=$(git log --format=%s --all -E --grep "^$1:$NAME$" | grep -E "$FULLNAME$" -c)
  439. case "$ncommits" in
  440. "1")
  441. HASH=($(git log --format=%h --all -E --grep "^$1:$NAME$" --grep "$FULLNAME$" --all-match))
  442. NAME="$(git log --format=%s --max-count=1 "$HASH")"
  443. ;;
  444. "0")
  445. if git log "$HASH" &> /dev/null;then # find commit by its hash
  446. NAME="$(git log --format=%s --max-count=1 "$HASH")"
  447. # check that the commit matches the given type in $1
  448. echo "$NAME" | grep -E "^$1:" --quiet || die "$HASH is not the commit hash of existing $1 of patient:$PATIENT"
  449. else
  450. die "$2 is neither the commit hash nor the name of existing $1 of patient:$PATIENT"
  451. fi
  452. ;;
  453. *)
  454. die "$2 matches multiple commits of type $1 of patient:$PATIENT.
  455. Please be more specific"
  456. ;;
  457. esac
  458. }
  459. # Validate hashes or resolve and expand names given after a list verb
  460. function validate_hashes
  461. {
  462. local hash
  463. local out=()
  464. if [[ "${#HASHES[@]}" -gt 0 && "$HASHES" ]];then
  465. for hash in "${HASHES[@]}";do
  466. if git log --format=%s --all | grep -E "$hash$" --quiet;then
  467. out+=("$(git log --format=%h --all --grep "$hash$")")
  468. elif git log "$hash" &> /dev/null;then
  469. out+=("$(git log --format=%h --max-count=1 "$hash")")
  470. else
  471. die "$hash is neither the commit hash nor the name of an existing object of patient:$PATIENT"
  472. fi
  473. done
  474. HASHES=("${out[@]}")
  475. else
  476. HASHES="--all"
  477. fi
  478. }
  479. ###############################################################################
  480. # PROV Helpers
  481. # Prepare the base components of the PROV message
  482. function base_prov
  483. {
  484. MESSAGE_ACTIVITY=
  485. MESSAGE_AGENT=
  486. MESSAGE_ENTITY="
  487. :$OBJECT:$ID prov:generatedAtTime xsd:dateTime:$DATE .
  488. :$OBJECT:$ID a prov:Entity .
  489. :$OBJECT:$ID a :$OBJECT .
  490. :$OBJECT:$ID foaf:name \"$ID\" ."
  491. if [[ "$PROVIDER" ]];then
  492. MESSAGE_AGENT+="
  493. :provider:$PROVIDER a prov:Agent .
  494. :provider:$PROVIDER a :provider ."
  495. MESSAGE_ENTITY+="
  496. :$OBJECT:$ID prov:wasAttributedTo :provider:$PROVIDER ."
  497. fi
  498. # Add a method PROV triplets if given
  499. if [[ "$METHOD" ]];then
  500. MESSAGE_ACTIVITY="
  501. :method:$METHOD a prov:Activity .
  502. :method:$METHOD a :method .
  503. :method:$METHOD prov:startedAtTime xsd:dateTime:$DATE ."
  504. if [[ "$PROVIDER" ]];then
  505. MESSAGE_ACTIVITY+="
  506. :method:$METHOD prov:wasAssociatedWith :provider:$PROVIDER ."
  507. fi
  508. MESSAGE_ENTITY+="
  509. :$OBJECT:$ID prov:wasGeneratedBy :method:$METHOD ."
  510. fi
  511. }
  512. # Validate and add relevant triplets for wasRevisionOf
  513. # An $OBJECT can only be a revision_of another $OBJECT
  514. function revision_of
  515. {
  516. validate_commit "$OBJECT" "$REVISION_OF"
  517. MESSAGE_ENTITY+="
  518. :$OBJECT:$ID prov:wasRevisionOf :$NAME ."
  519. }
  520. # Validate and add relevant triplets for wasInvalidated
  521. function invalidate
  522. {
  523. local inval
  524. local out_hash=()
  525. local out_name=()
  526. for inval in "${INVALIDATE[@]}";do
  527. validate_commit "$OBJECT" "$inval"
  528. out_hash+=("$HASH")
  529. out_name+=("$NAME")
  530. MESSAGE_ENTITY+="
  531. :$NAME prov:invalidatedAtTime xsd:dateTime:$DATE ."
  532. if [[ "$METHOD" ]];then
  533. MESSAGE_ENTITY+="
  534. :$NAME prov:wasInvalidatedBy :method:$METHOD ."
  535. fi
  536. done
  537. HASH=("${out_hash[@]}")
  538. NAME=("${out_name[@]}")
  539. }
  540. # Validate and add relevant triplets for wasDerivedFrom
  541. # $1 : type
  542. function use
  543. {
  544. local use
  545. local out_hash=()
  546. local out_name=()
  547. for use in "${USE[@]}";do
  548. validate_commit "$1" "$use"
  549. out_hash+=("$HASH")
  550. out_name+=("$NAME")
  551. MESSAGE_ENTITY+="
  552. :$OBJECT:$ID prov:wasDerivedFrom :$NAME ."
  553. if [[ "$METHOD" ]];then
  554. MESSAGE_ACTIVITY+="
  555. :method:$METHOD prov:used :$NAME ."
  556. fi
  557. done
  558. HASH=("${out_hash[@]}")
  559. NAME=("${out_name[@]}")
  560. }
  561. # Do the actual git commit with all the parameters set
  562. function commit
  563. {
  564. dbg git commit --allow-empty \
  565. --author "$AUTHOR" \
  566. --date "$DATE" \
  567. --message "$OBJECT:$ID
  568. $MESSAGE_ENTITY
  569. $MESSAGE_AGENT
  570. $MESSAGE_ACTIVITY
  571. $MESSAGE2"
  572. dbg git annex restage
  573. }
  574. ###############################################################################
  575. #
  576. # Utility functions
  577. # Print messages passed as arguments and exit in error
  578. function die
  579. {
  580. echo -e "\033[0;31m$@\033[0m"
  581. exit 2
  582. }
  583. # Redirect commands to &>/dev/null if not DEBUGging
  584. function dbg
  585. {
  586. if [[ "$DEBUG" == "debug" ]]; then
  587. echo -ne "\033[0;30m"
  588. "$@"
  589. echo -ne "\033[0m"
  590. elif [[ "$DEBUG" == "dry" ]];then
  591. echo -e "\033[0;34m$@\033[0m"
  592. elif [[ "$DEBUG" == "verbose" ]];then
  593. echo -e "\033[0;34m$@\033[0m"
  594. echo -ne "\033[0;30m"
  595. "$@"
  596. echo -ne "\033[0m"
  597. else
  598. "$@" &>/dev/null
  599. fi
  600. }
  601. # Stash in place modifications
  602. function stash
  603. {
  604. git stash push
  605. "$@"
  606. git stash pop
  607. }
  608. ###############################################################################
  609. #
  610. # Usage
  611. function usage
  612. {
  613. case "$1" in
  614. root)
  615. echo "git ommix {verb} {object} [--options] [files]
  616. Verbs:
  617. - add
  618. - list
  619. - get
  620. Type \"git ommix {verb}\" to get help on {verb}
  621. Debugging options:
  622. -d|--debug : print the raw command output
  623. --dry : print instead of running any write command
  624. --verbose : print and run write commands
  625. Git ommix can be configured system-wide with /etc/gitommix.conf,
  626. per user with \$XDG_CONFIG/.gitommix
  627. or with environment variables:
  628. - GIT_OMMIX_REPO_DIR : place to find patient repos (default: ~/GitOmmix/)
  629. - GIT_OMMIX_LARGE_FILES : git ommix rules for large files (default: largerthan=100Mb and (include=data/* or include=results/*))
  630. - GIT_OMMIX_DEFAULT_AUTHOR : set a default commit author (default: gitommix <gitommix>)" ;;
  631. add)
  632. echo "git ommix add <object> [--options] [--message] [FILES]
  633. Add a new instance of an object.
  634. Various options can be associated with an object (id, provider, method, etc.).
  635. Some options are mandatory depending on the added object.
  636. Anything other than a new patient has to be associated to a patient.
  637. Data and results are associated to samples.
  638. Results use data.
  639. Diagnoses use results and other diagnoses.
  640. Data and results add [FILES] to the repo into the respective directory.
  641. If [FILES] is not specified, all the files in the current directory are added to the data/result.
  642. Objects:
  643. - patient
  644. - sample -p <patient>
  645. - data -p <patient> -s <sample>
  646. - result -p <patient> -s <sample> --use <data>
  647. - diagnosis -p <patient> --use <result|diagnosis>
  648. Options:
  649. --id (-i) (default: randomly generated string)
  650. --patient (-p)
  651. --sample (-s)
  652. --method
  653. --date (default: current date)
  654. --provider
  655. --use
  656. --revision_of
  657. --invalidate
  658. Data, results, and diagnoses can be a revision_of and/or invalidate another object of the same type.
  659. Additional PROV triples further qualifying the objects can be added in the turtle format using --message" ;;
  660. list)
  661. echo "git ommix list {object} -p <patient> [<object>...]
  662. List all the objects of the given type in the given patient, optionally constrained to the history of one or multiple objects.
  663. Objects:
  664. - patient
  665. - sample
  666. - data
  667. - result
  668. - diagnosis
  669. Optional reference objects can be specified as commit hashes, the full name of the object, or only the name part of the object, matching all the objects with the same name." ;;
  670. get)
  671. echo "git ommix get {option} -p <patient> [<object>...]
  672. Run queries on the patient's git ommix store, optionally constrained to the history of one or multiple objects.
  673. - prov: get the PROV in turtle format
  674. - graph: get a graphical representation of the PROV
  675. - last: get the last version of an object
  676. - timeline: get a timeline of diagnoses
  677. - object: checkout the patient repo at the time of the addition of the object
  678. - log: get the git log of the repo
  679. - file: get the list of files added by an object
  680. - sparql: execute an arbitrary SPARQL query" ;;
  681. esac
  682. exit 0
  683. }