Skip to content
Snippets Groups Projects
Select Git revision
  • b6e40e09b8bcaf707bec595ac7fd2dda157ce884
  • master default protected
  • 77-improve-app-metadata
  • 68-add-words
  • 62-fix-get-image-when-word-with-accent
  • 44-implement-game-write-word-from-letters
  • 43-add-script-to-get-images-from-assets
  • 32-add-accents-and-diacritics-in-french-words-2
  • Release_0.10.2_82 protected
  • Release_0.10.1_81 protected
  • Release_0.10.0_80 protected
  • Release_0.9.2_79 protected
  • Release_0.9.1_78 protected
  • Release_0.9.0_77 protected
  • Release_0.8.0_76 protected
  • Release_0.7.0_75 protected
  • Release_0.6.0_74 protected
  • Release_0.5.2_73 protected
  • Release_0.5.1_72 protected
  • Release_0.5.0_71 protected
  • Release_0.4.1_70 protected
  • Release_0.4.0_69 protected
  • Release_0.3.1_68 protected
  • Release_0.3.0_67 protected
  • Release_0.2.1_66 protected
  • Release_0.2.0_65 protected
  • Release_0.1.40_64 protected
  • Release_0.1.39_63 protected
28 results

01_download_images.sh

Blame
  • 01_download_images.sh 3.19 KiB
    #!/usr/bin/env bash
    
    set -o errexit
    set -o nounset
    set -o pipefail
    
    command -v convert >/dev/null 2>&1 || { echo >&2 "I require convert (imagemagick) but it's not installed. Aborting."; exit 1; }
    command -v jq >/dev/null 2>&1 || { echo >&2 "I require jq (json parser) but it's not installed. Aborting."; exit 1; }
    
    CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
    BASE_DIR="$(dirname "${CURRENT_DIR}")"
    
    # CSV source file for words
    SOURCE_CSV_FILE="${CURRENT_DIR}/words.csv"
    MAX_RANDOM_WORDS_TO_DOWNLOAD=100
    MAX_IMAGES_PER_WORD=5
    
    # Images variants
    KEYWORD_VARIANTS=",image,picture,drawing,black and white,painting,icon"
    
    IMAGES_CACHE_FOLDER="${CURRENT_DIR}/cache/download"
    mkdir -p "${IMAGES_CACHE_FOLDER}"
    
    KEYWORD_VARIANTS_LIST="$(echo "${KEYWORD_VARIANTS}" | tr "," "\n")"
    
    get_images_from_ddg() {
      local -r QUERY_STRING="$1"
      local -r BASE_URL="https://duckduckgo.com"
    
      local -r TOKEN="$(curl --silent "${BASE_URL}/q=${QUERY_STRING}" | sed 's/[&,]/\n/g' | sed 's/"//g' | grep "vqd" | tail -n 1 | cut -d'=' -f2)"
    
      local -r SEARCH_URL="https://duckduckgo.com/?t=ffab&q=${QUERY_STRING}&atb=v301-1&iax=images&ia=images"
      local -r RESULT="$(curl --silent "${SEARCH_URL}")"
      local -r JSON_URL="${BASE_URL}$(echo "${RESULT}" | sed 's/ /\n/g' | grep "initialize" | cut -d';' -f2 | cut -d "'" -f2 | sed 's/\/d\.js/\/i.js/g')"
    
      local -r RESULTS="$(curl --silent "${JSON_URL}")"
      local -r THUMBNAILS_URLS="$(echo "${RESULTS}" | sed 's/ /\n/g' | sed 's/,/\n/g' | grep '"thumbnail":' | cut -d '"' -f4 | head -n ${MAX_IMAGES_PER_WORD})"
      echo "${THUMBNAILS_URLS}"
    }
    
    WORDS_LIST="$(cat "${SOURCE_CSV_FILE}" | cut -d';' -f1 | sort | uniq | sort -R | head -n ${MAX_RANDOM_WORDS_TO_DOWNLOAD})"
    while read -r KEYWORD; do
      if [[ -n "${KEYWORD}" ]]; then
        echo "KEYWORD: ${KEYWORD}"
    
        while read -r VARIANT; do
          echo " VARIANT: ${VARIANT}"
    
          QUERY_STRING="${KEYWORD}"
          if [[ ! -z "${VARIANT}" ]]; then
            QUERY_STRING="${QUERY_STRING} ${VARIANT}"
          fi
    
          URL_LIST="$(get_images_from_ddg "${QUERY_STRING}")"
    
          if [[ -z "${URL_LIST}" ]]; then
            echo "    No image found..."
          fi
    
          while read -r URL; do
            if [[ -n "${URL}" ]]; then
              HASH="$(echo "${URL}" | md5sum | awk '{print $1}')"
    
              OUTPUT_FILE="${IMAGES_CACHE_FOLDER}/${KEYWORD}/${HASH}.png"
              echo "  OUTPUT_FILE: $(echo "${OUTPUT_FILE}" | sed "s|^${IMAGES_CACHE_FOLDER}/||g")"
    
              TMP_IMAGE_FILE="${OUTPUT_FILE}.tmp.png"
              if [[ -f "${TMP_IMAGE_FILE}" ]]; then
                rm -f "${TMP_IMAGE_FILE}"
              fi
    
              if [[ -f "${OUTPUT_FILE}" ]]; then
                echo "   - Already downloaded"
              else
                mkdir -p "$(dirname ${OUTPUT_FILE})"
    
                echo "   + Downloading..."
                wget --quiet --timeout=10 "${URL}" -O "${TMP_IMAGE_FILE}"
                if [[ -f "${TMP_IMAGE_FILE}" ]]; then
                  echo "   + Converting..."
                  convert "${TMP_IMAGE_FILE}" "${OUTPUT_FILE}"
                fi
    
                if [[ -f "${TMP_IMAGE_FILE}" ]]; then
                  rm -f "${TMP_IMAGE_FILE}"
                fi
              fi
            fi
          done < <(echo "${URL_LIST}")
        done < <(echo "${KEYWORD_VARIANTS_LIST}")
      fi
    done < <(echo "${WORDS_LIST}")