Dernière activité 1766739780

mjaschen a révisé ce gist 1766739780. Aller à la révision

1 file changed, 81 insertions

describe-image.sh(fichier créé)

@@ -0,0 +1,81 @@
1 + #!/usr/bin/env bash
2 +
3 + set -o errexit
4 + set -o pipefail
5 + set -o nounset
6 +
7 + # Check dependencies
8 + if ! command -v ollama &> /dev/null; then
9 + echo "Error: 'ollama' is not installed or not in PATH."
10 + exit 1
11 + fi
12 +
13 + if [[ "$#" -ne 1 && "$#" -ne 2 ]]; then
14 + echo "Usage: $0 <image> [target_language]"
15 + echo "Example: $0 screenshot.png de"
16 + exit 1
17 + fi
18 +
19 + IMAGE_FILE=$(realpath "$1")
20 +
21 + if [[ ! -f "$IMAGE_FILE" ]]; then
22 + echo "Error: File '$IMAGE_FILE' not found."
23 + exit 1
24 + fi
25 +
26 + # Default configuration
27 + # pass variables to override parameters, e.g.: WORDS=200 MODE=extended describe-image.sh image.jpg
28 + MODEL=${MODEL:-qwen3-vl:8b}
29 + MODE=${MODE:-simple}
30 + WORDS=${WORDS:-100}
31 + TRANSLATION_MODEL=${TRANSLATION_MODEL:-llama3.2}
32 +
33 + if ! [[ "$WORDS" =~ ^[0-9]+$ ]]; then
34 + echo "Error: WORDS must be numeric." >&2
35 + exit 1
36 + fi
37 +
38 + if [[ "$MODE" != "simple" && "$MODE" != "extended" ]]; then
39 + echo "Error: MODE must be either 'simple' or 'extended'." >&2
40 + exit 1
41 + fi
42 +
43 + PROMPT_SIMPLE="describe image ${IMAGE_FILE}"
44 + PROMPT_EXTENDED=$(cat <<EOT
45 + Please provide a functional, objective description of the provided image in no more than around ${WORDS} so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment.
46 +
47 + If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond ${WORDS} words. It should not contain quotation marks, as those tend to cause issues when rendered on the web.
48 +
49 + If there is no text found in the image, then there is no need to mention it.
50 +
51 + You should not begin the description with any variation of “The image”.
52 +
53 + Don't output the thinking process, just the final description.
54 +
55 + ${IMAGE_FILE}
56 + EOT
57 + )
58 +
59 + PROMPT_ACTUAL="$PROMPT_SIMPLE"
60 + if [[ "$MODE" == "extended" ]]; then
61 + PROMPT_ACTUAL="$PROMPT_EXTENDED"
62 + fi
63 +
64 + RESULT=$(ollama run "$MODEL" "$PROMPT_ACTUAL") || { echo "Error: ollama failed to generate description." >&2; exit 1; }
65 +
66 + if [[ "$#" -eq 1 ]]; then
67 + echo "$RESULT"
68 + exit 0
69 + fi
70 +
71 + # If a second argument is provided, treat it as the target language
72 + TARGET_LANGUAGE="$2"
73 +
74 + # Map 'de' shortcode to full name for better prompt understanding, optional for others
75 + if [[ "$TARGET_LANGUAGE" == "de" || "$TARGET_LANGUAGE" == "DE" ]]; then
76 + TARGET_LANGUAGE="German"
77 + fi
78 +
79 + TRANSLATED_RESULT=$(ollama run "$TRANSLATION_MODEL" "Translate the following text to $TARGET_LANGUAGE. Do not add any conversational filler, just provide the translation:\n\n$RESULT") || { echo "Error: ollama failed to translate description." >&2; exit 1; }
80 +
81 + echo "$TRANSLATED_RESULT"
Plus récent Plus ancien