Project 'android/puissance4' was moved to 'android/org.benoitharrault.puissance4'. Please update any links and bookmarks that may still have the old path.
Select Git revision
pubspec.lock
01_download_images.sh 3.19 KiB
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
command -v convert >/dev/null 2>&1 || { echo >&2 "I require convert (imagemagick) but it's not installed. Aborting."; exit 1; }
command -v jq >/dev/null 2>&1 || { echo >&2 "I require jq (json parser) but it's not installed. Aborting."; exit 1; }
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
BASE_DIR="$(dirname "${CURRENT_DIR}")"
# CSV source file for words
SOURCE_CSV_FILE="${CURRENT_DIR}/words.csv"
MAX_RANDOM_WORDS_TO_DOWNLOAD=100
MAX_IMAGES_PER_WORD=5
# Images variants
KEYWORD_VARIANTS=",image,picture,drawing,black and white,painting,icon"
IMAGES_CACHE_FOLDER="${CURRENT_DIR}/cache/download"
mkdir -p "${IMAGES_CACHE_FOLDER}"
KEYWORD_VARIANTS_LIST="$(echo "${KEYWORD_VARIANTS}" | tr "," "\n")"
get_images_from_ddg() {
local -r QUERY_STRING="$1"
local -r BASE_URL="https://duckduckgo.com"
local -r TOKEN="$(curl --silent "${BASE_URL}/q=${QUERY_STRING}" | sed 's/[&,]/\n/g' | sed 's/"//g' | grep "vqd" | tail -n 1 | cut -d'=' -f2)"
local -r SEARCH_URL="https://duckduckgo.com/?t=ffab&q=${QUERY_STRING}&atb=v301-1&iax=images&ia=images"
local -r RESULT="$(curl --silent "${SEARCH_URL}")"
local -r JSON_URL="${BASE_URL}$(echo "${RESULT}" | sed 's/ /\n/g' | grep "initialize" | cut -d';' -f2 | cut -d "'" -f2 | sed 's/\/d\.js/\/i.js/g')"
local -r RESULTS="$(curl --silent "${JSON_URL}")"
local -r THUMBNAILS_URLS="$(echo "${RESULTS}" | sed 's/ /\n/g' | sed 's/,/\n/g' | grep '"thumbnail":' | cut -d '"' -f4 | head -n ${MAX_IMAGES_PER_WORD})"
echo "${THUMBNAILS_URLS}"
}
WORDS_LIST="$(cat "${SOURCE_CSV_FILE}" | cut -d';' -f1 | sort | uniq | sort -R | head -n ${MAX_RANDOM_WORDS_TO_DOWNLOAD})"
while read -r KEYWORD; do
if [[ -n "${KEYWORD}" ]]; then
echo "KEYWORD: ${KEYWORD}"
while read -r VARIANT; do
echo " VARIANT: ${VARIANT}"
QUERY_STRING="${KEYWORD}"
if [[ ! -z "${VARIANT}" ]]; then
QUERY_STRING="${QUERY_STRING} ${VARIANT}"
fi
URL_LIST="$(get_images_from_ddg "${QUERY_STRING}")"
if [[ -z "${URL_LIST}" ]]; then
echo " No image found..."
fi
while read -r URL; do
if [[ -n "${URL}" ]]; then
HASH="$(echo "${URL}" | md5sum | awk '{print $1}')"
OUTPUT_FILE="${IMAGES_CACHE_FOLDER}/${KEYWORD}/${HASH}.png"
echo " OUTPUT_FILE: $(echo "${OUTPUT_FILE}" | sed "s|^${IMAGES_CACHE_FOLDER}/||g")"
TMP_IMAGE_FILE="${OUTPUT_FILE}.tmp.png"
if [[ -f "${TMP_IMAGE_FILE}" ]]; then
rm -f "${TMP_IMAGE_FILE}"
fi
if [[ -f "${OUTPUT_FILE}" ]]; then
echo " - Already downloaded"
else
mkdir -p "$(dirname ${OUTPUT_FILE})"
echo " + Downloading..."
wget --quiet --timeout=10 "${URL}" -O "${TMP_IMAGE_FILE}"
if [[ -f "${TMP_IMAGE_FILE}" ]]; then
echo " + Converting..."
convert "${TMP_IMAGE_FILE}" "${OUTPUT_FILE}"
fi
if [[ -f "${TMP_IMAGE_FILE}" ]]; then
rm -f "${TMP_IMAGE_FILE}"
fi
fi
fi
done < <(echo "${URL_LIST}")
done < <(echo "${KEYWORD_VARIANTS_LIST}")
fi
done < <(echo "${WORDS_LIST}")