mjaschen bu gisti düzenledi . Düzenlemeye git
1 file changed, 81 insertions
describe-image.sh(dosya oluşturuldu)
| @@ -0,0 +1,81 @@ | |||
| 1 | + | #!/usr/bin/env bash | |
| 2 | + | ||
| 3 | + | set -o errexit | |
| 4 | + | set -o pipefail | |
| 5 | + | set -o nounset | |
| 6 | + | ||
| 7 | + | # Check dependencies | |
| 8 | + | if ! command -v ollama &> /dev/null; then | |
| 9 | + | echo "Error: 'ollama' is not installed or not in PATH." | |
| 10 | + | exit 1 | |
| 11 | + | fi | |
| 12 | + | ||
| 13 | + | if [[ "$#" -ne 1 && "$#" -ne 2 ]]; then | |
| 14 | + | echo "Usage: $0 <image> [target_language]" | |
| 15 | + | echo "Example: $0 screenshot.png de" | |
| 16 | + | exit 1 | |
| 17 | + | fi | |
| 18 | + | ||
| 19 | + | IMAGE_FILE=$(realpath "$1") | |
| 20 | + | ||
| 21 | + | if [[ ! -f "$IMAGE_FILE" ]]; then | |
| 22 | + | echo "Error: File '$IMAGE_FILE' not found." | |
| 23 | + | exit 1 | |
| 24 | + | fi | |
| 25 | + | ||
| 26 | + | # Default configuration | |
| 27 | + | # pass variables to override parameters, e.g.: WORDS=200 MODE=extended describe-image.sh image.jpg | |
| 28 | + | MODEL=${MODEL:-qwen3-vl:8b} | |
| 29 | + | MODE=${MODE:-simple} | |
| 30 | + | WORDS=${WORDS:-100} | |
| 31 | + | TRANSLATION_MODEL=${TRANSLATION_MODEL:-llama3.2} | |
| 32 | + | ||
| 33 | + | if ! [[ "$WORDS" =~ ^[0-9]+$ ]]; then | |
| 34 | + | echo "Error: WORDS must be numeric." >&2 | |
| 35 | + | exit 1 | |
| 36 | + | fi | |
| 37 | + | ||
| 38 | + | if [[ "$MODE" != "simple" && "$MODE" != "extended" ]]; then | |
| 39 | + | echo "Error: MODE must be either 'simple' or 'extended'." >&2 | |
| 40 | + | exit 1 | |
| 41 | + | fi | |
| 42 | + | ||
| 43 | + | PROMPT_SIMPLE="describe image ${IMAGE_FILE}" | |
| 44 | + | PROMPT_EXTENDED=$(cat <<EOT | |
| 45 | + | Please provide a functional, objective description of the provided image in no more than around ${WORDS} so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment. | |
| 46 | + | ||
| 47 | + | If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond ${WORDS} words. It should not contain quotation marks, as those tend to cause issues when rendered on the web. | |
| 48 | + | ||
| 49 | + | If there is no text found in the image, then there is no need to mention it. | |
| 50 | + | ||
| 51 | + | You should not begin the description with any variation of “The image”. | |
| 52 | + | ||
| 53 | + | Don't output the thinking process, just the final description. | |
| 54 | + | ||
| 55 | + | ${IMAGE_FILE} | |
| 56 | + | EOT | |
| 57 | + | ) | |
| 58 | + | ||
| 59 | + | PROMPT_ACTUAL="$PROMPT_SIMPLE" | |
| 60 | + | if [[ "$MODE" == "extended" ]]; then | |
| 61 | + | PROMPT_ACTUAL="$PROMPT_EXTENDED" | |
| 62 | + | fi | |
| 63 | + | ||
| 64 | + | RESULT=$(ollama run "$MODEL" "$PROMPT_ACTUAL") || { echo "Error: ollama failed to generate description." >&2; exit 1; } | |
| 65 | + | ||
| 66 | + | if [[ "$#" -eq 1 ]]; then | |
| 67 | + | echo "$RESULT" | |
| 68 | + | exit 0 | |
| 69 | + | fi | |
| 70 | + | ||
| 71 | + | # If a second argument is provided, treat it as the target language | |
| 72 | + | TARGET_LANGUAGE="$2" | |
| 73 | + | ||
| 74 | + | # Map 'de' shortcode to full name for better prompt understanding, optional for others | |
| 75 | + | if [[ "$TARGET_LANGUAGE" == "de" || "$TARGET_LANGUAGE" == "DE" ]]; then | |
| 76 | + | TARGET_LANGUAGE="German" | |
| 77 | + | fi | |
| 78 | + | ||
| 79 | + | TRANSLATED_RESULT=$(ollama run "$TRANSLATION_MODEL" "Translate the following text to $TARGET_LANGUAGE. Do not add any conversational filler, just provide the translation:\n\n$RESULT") || { echo "Error: ollama failed to translate description." >&2; exit 1; } | |
| 80 | + | ||
| 81 | + | echo "$TRANSLATED_RESULT" | |
Daha yeni
Daha eski