Difference between revisions of "User:Andreas Plank/Useful Scripts (Linux)"
From CETAF Identifiers Wiki
(Created page with "== Get IDs of api.ror.org by Query == <syntaxhighlight lang="bash" style="font-size:smaller;"> # ############################################## # dependency: curl # dependenc...") |
m (→Get IDs of api.ror.org by Query: description) |
||
(12 intermediate revisions by the same user not shown) | |||
Line 3: | Line 3: | ||
<syntaxhighlight lang="bash" style="font-size:smaller;"> | <syntaxhighlight lang="bash" style="font-size:smaller;"> | ||
# ############################################## | # ############################################## | ||
+ | # description: get ID and some additional information of api.ror.org via a query match (exact match or match by words) | ||
# dependency: curl | # dependency: curl | ||
# dependency: jq (JSON tool) | # dependency: jq (JSON tool) | ||
# dependency: awk | # dependency: awk | ||
− | # | + | |
+ | # #### returned JSON | ||
+ | # { | ||
+ | # "number_of_results": 1, | ||
+ | # "time_taken": 41, | ||
+ | # "items": [ | ||
+ | # { | ||
+ | # "id": "https://ror.org/00bv4cx53", | ||
+ | # "name": "Botanischer Garten und Botanisches Museum Berlin", | ||
+ | # "types": [ | ||
+ | # "Archive" | ||
+ | # … | ||
+ | # } | ||
+ | |||
+ | # #### one entry by exact query phrase | ||
curl -G https://api.ror.org/organizations --data-urlencode 'query="Botanischer Garten und Botanisches Museum Berlin"' | jq . > bgbm.org.json | curl -G https://api.ror.org/organizations --data-urlencode 'query="Botanischer Garten und Botanisches Museum Berlin"' | jq . > bgbm.org.json | ||
jq '.items|=sort_by(.name)|.items [] | (.id + "::" + .name)' bgbm.org.json | jq '.items|=sort_by(.name)|.items [] | (.id + "::" + .name)' bgbm.org.json | ||
# "https://ror.org/00bv4cx53::Botanischer Garten und Botanisches Museum Berlin" | # "https://ror.org/00bv4cx53::Botanischer Garten und Botanisches Museum Berlin" | ||
− | # get from multiple result pages | + | # #### get from multiple result pages |
− | query_string="Botanisch Garten garden botanical" | + | query_string="Botanisch Garten Gärten garden botanical gardens natural naturalis museum" |
number_of_results=`curl --silent --get https://api.ror.org/organizations --data-urlencode "query=$query_string" | jq ".number_of_results"` | number_of_results=`curl --silent --get https://api.ror.org/organizations --data-urlencode "query=$query_string" | jq ".number_of_results"` | ||
number_of_pages=`echo "scale=0; ${number_of_results}/20 + 0.5" | bc -l | awk '{printf("%d\n",$1 + 0.5)}'` | number_of_pages=`echo "scale=0; ${number_of_results}/20 + 0.5" | bc -l | awk '{printf("%d\n",$1 + 0.5)}'` | ||
if [[ "$number_of_pages" =~ ^[0-9]+$ ]]; then | if [[ "$number_of_pages" =~ ^[0-9]+$ ]]; then | ||
for ((p=1; p<=$number_of_pages; p++)); do | for ((p=1; p<=$number_of_pages; p++)); do | ||
− | echo "# page $p => ror.org_page-$p.json" | + | echo "# get content of page $p => ror.org_page-$p.json" |
contents=$(curl --silent --get https://api.ror.org/organizations --data-urlencode "query=$query_string" --data-urlencode "page=$p" ) | contents=$(curl --silent --get https://api.ror.org/organizations --data-urlencode "query=$query_string" --data-urlencode "page=$p" ) | ||
echo "$contents" > "ror.org_page-$p.json" | echo "$contents" > "ror.org_page-$p.json" | ||
done | done | ||
jq --slurp '{ items: map(.items[]) }' ror.org_page-*.json > ror.org_page-all.json | jq --slurp '{ items: map(.items[]) }' ror.org_page-*.json > ror.org_page-all.json | ||
− | jq '.items|=sort_by(.name)|.items [] | (.id + "::" + .name)' ror.org_page-all.json | + | # parse multiple files and concatenate the { items : [] } |
+ | |||
+ | # jq '.items|=sort_by(.name)|.items [] | (.id + " :: " + .name + " :: " + .links[0])' ror.org_page-all.json | ||
+ | jq '.items|=sort_by(.name)|.items [] | (.id + " :: " + .name + " :: " + .links[0] + " :: acronym: " + if .acronyms[0] then .acronyms[0] else "NA" end )' ror.org_page-all.json | ||
+ | # find in { items : [ {id : … , name : … , links : [ ] }] } and concatenate them via "::" | ||
else | else | ||
echo "Something wrong: \$number_of_pages is not an integer ()" | echo "Something wrong: \$number_of_pages is not an integer ()" | ||
fi | fi | ||
− | # "https://ror.org/ | + | |
− | # "https://ror.org/ | + | # remove all individually downloaded files ror.org_page-1.json … ror.org_page-123.json etc. |
+ | # rm ror.org_page-[1-9]*.json | ||
+ | |||
+ | # grab single search string via grep | ||
+ | jq '.items|=sort_by(.name)|.items [] | (.id + " :: " + .name + " :: " + .links[0] + " :: acronym: " + if .acronyms[0] then .acronyms[0] else "NA" end )' ror.org_page-all.json | \ | ||
+ | grep --ignore-case mnhn | ||
+ | |||
+ | # #### result list | ||
+ | # "https://ror.org/02ya7hp91 :: ALZHIR Museum and Memorial Complex :: https://museum-alzhir.kz/en :: acronym: NA" | ||
+ | # "https://ror.org/02pvtxb60 :: ASTRA National Museum Complex :: http://www.muzeulastra.ro/ :: acronym: NA" | ||
# … | # … | ||
− | # "https://ror.org/00bv4cx53::Botanischer Garten und Botanisches Museum Berlin" | + | # "https://ror.org/00bv4cx53 :: Botanischer Garten und Botanisches Museum Berlin :: https://www.bgbm.org/ :: acronym: BGBM" |
+ | # … | ||
+ | # "https://ror.org/01h1jbk91 :: Meise Botanic Garden :: https://www.plantentuinmeise.be/en/ :: acronym: NA" | ||
+ | # … | ||
+ | # "https://ror.org/03wkt5x30 :: National Museum of Natural History :: http://www.mnhn.fr/fr :: acronym: MNHN" | ||
+ | # … | ||
+ | # "https://ror.org/0566bfb96 :: Naturalis Biodiversity Center :: http://www.naturalis.nl/ :: acronym: NA" | ||
# … | # … | ||
− | # "https://ror.org/ | + | # "https://ror.org/0349vqz63 :: Royal Botanic Garden Edinburgh :: http://www.rbge.org.uk/ :: acronym: RBGE" |
+ | # "https://ror.org/00ynnr806 :: Royal Botanic Gardens :: http://www.kew.org/ :: acronym: NA" | ||
# … | # … | ||
− | # "https://ror.org/ | + | # "https://ror.org/03pjnvf85 :: Zürich Zoological Garden :: https://www.zoo.ch/ :: acronym: NA" |
# … | # … | ||
− | |||
# ############################################## | # ############################################## | ||
</syntaxhighlight> | </syntaxhighlight> |
Latest revision as of 15:51, 8 July 2020
Get IDs of api.ror.org by Query
# ##############################################
# description: get ID and some additional information of api.ror.org via a query match (exact match or match by words)
# dependency: curl
# dependency: jq (JSON tool)
# dependency: awk
# #### returned JSON
# {
# "number_of_results": 1,
# "time_taken": 41,
# "items": [
# {
# "id": "https://ror.org/00bv4cx53",
# "name": "Botanischer Garten und Botanisches Museum Berlin",
# "types": [
# "Archive"
# …
# }
# #### one entry by exact query phrase
curl -G https://api.ror.org/organizations --data-urlencode 'query="Botanischer Garten und Botanisches Museum Berlin"' | jq . > bgbm.org.json
jq '.items|=sort_by(.name)|.items [] | (.id + "::" + .name)' bgbm.org.json
# "https://ror.org/00bv4cx53::Botanischer Garten und Botanisches Museum Berlin"
# #### get from multiple result pages
query_string="Botanisch Garten Gärten garden botanical gardens natural naturalis museum"
number_of_results=`curl --silent --get https://api.ror.org/organizations --data-urlencode "query=$query_string" | jq ".number_of_results"`
number_of_pages=`echo "scale=0; ${number_of_results}/20 + 0.5" | bc -l | awk '{printf("%d\n",$1 + 0.5)}'`
if [[ "$number_of_pages" =~ ^[0-9]+$ ]]; then
for ((p=1; p<=$number_of_pages; p++)); do
echo "# get content of page $p => ror.org_page-$p.json"
contents=$(curl --silent --get https://api.ror.org/organizations --data-urlencode "query=$query_string" --data-urlencode "page=$p" )
echo "$contents" > "ror.org_page-$p.json"
done
jq --slurp '{ items: map(.items[]) }' ror.org_page-*.json > ror.org_page-all.json
# parse multiple files and concatenate the { items : [] }
# jq '.items|=sort_by(.name)|.items [] | (.id + " :: " + .name + " :: " + .links[0])' ror.org_page-all.json
jq '.items|=sort_by(.name)|.items [] | (.id + " :: " + .name + " :: " + .links[0] + " :: acronym: " + if .acronyms[0] then .acronyms[0] else "NA" end )' ror.org_page-all.json
# find in { items : [ {id : … , name : … , links : [ ] }] } and concatenate them via "::"
else
echo "Something wrong: \$number_of_pages is not an integer ()"
fi
# remove all individually downloaded files ror.org_page-1.json … ror.org_page-123.json etc.
# rm ror.org_page-[1-9]*.json
# grab single search string via grep
jq '.items|=sort_by(.name)|.items [] | (.id + " :: " + .name + " :: " + .links[0] + " :: acronym: " + if .acronyms[0] then .acronyms[0] else "NA" end )' ror.org_page-all.json | \
grep --ignore-case mnhn
# #### result list
# "https://ror.org/02ya7hp91 :: ALZHIR Museum and Memorial Complex :: https://museum-alzhir.kz/en :: acronym: NA"
# "https://ror.org/02pvtxb60 :: ASTRA National Museum Complex :: http://www.muzeulastra.ro/ :: acronym: NA"
# …
# "https://ror.org/00bv4cx53 :: Botanischer Garten und Botanisches Museum Berlin :: https://www.bgbm.org/ :: acronym: BGBM"
# …
# "https://ror.org/01h1jbk91 :: Meise Botanic Garden :: https://www.plantentuinmeise.be/en/ :: acronym: NA"
# …
# "https://ror.org/03wkt5x30 :: National Museum of Natural History :: http://www.mnhn.fr/fr :: acronym: MNHN"
# …
# "https://ror.org/0566bfb96 :: Naturalis Biodiversity Center :: http://www.naturalis.nl/ :: acronym: NA"
# …
# "https://ror.org/0349vqz63 :: Royal Botanic Garden Edinburgh :: http://www.rbge.org.uk/ :: acronym: RBGE"
# "https://ror.org/00ynnr806 :: Royal Botanic Gardens :: http://www.kew.org/ :: acronym: NA"
# …
# "https://ror.org/03pjnvf85 :: Zürich Zoological Garden :: https://www.zoo.ch/ :: acronym: NA"
# …
# ##############################################