Ultima attività 1766739780

Revisione da9fe99ebc2cd91125a3115b8ac52385cfbda7bb

describe-image.sh Raw
1#!/usr/bin/env bash
2
3set -o errexit
4set -o pipefail
5set -o nounset
6
7# Check dependencies
8if ! command -v ollama &> /dev/null; then
9 echo "Error: 'ollama' is not installed or not in PATH."
10 exit 1
11fi
12
13if [[ "$#" -ne 1 && "$#" -ne 2 ]]; then
14 echo "Usage: $0 <image> [target_language]"
15 echo "Example: $0 screenshot.png de"
16 exit 1
17fi
18
19IMAGE_FILE=$(realpath "$1")
20
21if [[ ! -f "$IMAGE_FILE" ]]; then
22 echo "Error: File '$IMAGE_FILE' not found."
23 exit 1
24fi
25
26# Default configuration
27# pass variables to override parameters, e.g.: WORDS=200 MODE=extended describe-image.sh image.jpg
28MODEL=${MODEL:-qwen3-vl:8b}
29MODE=${MODE:-simple}
30WORDS=${WORDS:-100}
31TRANSLATION_MODEL=${TRANSLATION_MODEL:-llama3.2}
32
33if ! [[ "$WORDS" =~ ^[0-9]+$ ]]; then
34 echo "Error: WORDS must be numeric." >&2
35 exit 1
36fi
37
38if [[ "$MODE" != "simple" && "$MODE" != "extended" ]]; then
39 echo "Error: MODE must be either 'simple' or 'extended'." >&2
40 exit 1
41fi
42
43PROMPT_SIMPLE="describe image ${IMAGE_FILE}"
44PROMPT_EXTENDED=$(cat <<EOT
45Please provide a functional, objective description of the provided image in no more than around ${WORDS} so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment.
46
47If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond ${WORDS} words. It should not contain quotation marks, as those tend to cause issues when rendered on the web.
48
49If there is no text found in the image, then there is no need to mention it.
50
51You should not begin the description with any variation of “The image”.
52
53Don't output the thinking process, just the final description.
54
55${IMAGE_FILE}
56EOT
57)
58
59PROMPT_ACTUAL="$PROMPT_SIMPLE"
60if [[ "$MODE" == "extended" ]]; then
61 PROMPT_ACTUAL="$PROMPT_EXTENDED"
62fi
63
64RESULT=$(ollama run "$MODEL" "$PROMPT_ACTUAL") || { echo "Error: ollama failed to generate description." >&2; exit 1; }
65
66if [[ "$#" -eq 1 ]]; then
67 echo "$RESULT"
68 exit 0
69fi
70
71# If a second argument is provided, treat it as the target language
72TARGET_LANGUAGE="$2"
73
74# Map 'de' shortcode to full name for better prompt understanding, optional for others
75if [[ "$TARGET_LANGUAGE" == "de" || "$TARGET_LANGUAGE" == "DE" ]]; then
76 TARGET_LANGUAGE="German"
77fi
78
79TRANSLATED_RESULT=$(ollama run "$TRANSLATION_MODEL" "Translate the following text to $TARGET_LANGUAGE. Do not add any conversational filler, just provide the translation:\n\n$RESULT") || { echo "Error: ollama failed to translate description." >&2; exit 1; }
80
81echo "$TRANSLATED_RESULT"
82