describe-image.sh düzenlemesi

mjaschen bu gisti düzenledi 1766739780. Düzenlemeye git

1 file changed, 81 insertions

describe-image.sh(dosya oluşturuldu)

		@@ -0,0 +1,81 @@
1	+	#!/usr/bin/env bash
2	+
3	+	set -o errexit
4	+	set -o pipefail
5	+	set -o nounset
6	+
7	+	# Check dependencies
8	+	if ! command -v ollama &> /dev/null; then
9	+	echo "Error: 'ollama' is not installed or not in PATH."
10	+	exit 1
11	+	fi
12	+
13	+	if [[ "$#" -ne 1 && "$#" -ne 2 ]]; then
14	+	echo "Usage: $0 <image> [target_language]"
15	+	echo "Example: $0 screenshot.png de"
16	+	exit 1
17	+	fi
18	+
19	+	IMAGE_FILE=$(realpath "$1")
20	+
21	+	if [[ ! -f "$IMAGE_FILE" ]]; then
22	+	echo "Error: File '$IMAGE_FILE' not found."
23	+	exit 1
24	+	fi
25	+
26	+	# Default configuration
27	+	# pass variables to override parameters, e.g.: WORDS=200 MODE=extended describe-image.sh image.jpg
28	+	MODEL=${MODEL:-qwen3-vl:8b}
29	+	MODE=${MODE:-simple}
30	+	WORDS=${WORDS:-100}
31	+	TRANSLATION_MODEL=${TRANSLATION_MODEL:-llama3.2}
32	+
33	+	if ! [[ "$WORDS" =~ ^[0-9]+$ ]]; then
34	+	echo "Error: WORDS must be numeric." >&2
35	+	exit 1
36	+	fi
37	+
38	+	if [[ "$MODE" != "simple" && "$MODE" != "extended" ]]; then
39	+	echo "Error: MODE must be either 'simple' or 'extended'." >&2
40	+	exit 1
41	+	fi
42	+
43	+	PROMPT_SIMPLE="describe image ${IMAGE_FILE}"
44	+	PROMPT_EXTENDED=$(cat <<EOT
45	+	Please provide a functional, objective description of the provided image in no more than around ${WORDS} so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment.
46	+
47	+	If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond ${WORDS} words. It should not contain quotation marks, as those tend to cause issues when rendered on the web.
48	+
49	+	If there is no text found in the image, then there is no need to mention it.
50	+
51	+	You should not begin the description with any variation of “The image”.
52	+
53	+	Don't output the thinking process, just the final description.
54	+
55	+	${IMAGE_FILE}
56	+	EOT
57	+	)
58	+
59	+	PROMPT_ACTUAL="$PROMPT_SIMPLE"
60	+	if [[ "$MODE" == "extended" ]]; then
61	+	PROMPT_ACTUAL="$PROMPT_EXTENDED"
62	+	fi
63	+
64	+	RESULT=$(ollama run "$MODEL" "$PROMPT_ACTUAL") \|\| { echo "Error: ollama failed to generate description." >&2; exit 1; }
65	+
66	+	if [[ "$#" -eq 1 ]]; then
67	+	echo "$RESULT"
68	+	exit 0
69	+	fi
70	+
71	+	# If a second argument is provided, treat it as the target language
72	+	TARGET_LANGUAGE="$2"
73	+
74	+	# Map 'de' shortcode to full name for better prompt understanding, optional for others
75	+	if [[ "$TARGET_LANGUAGE" == "de" \|\| "$TARGET_LANGUAGE" == "DE" ]]; then
76	+	TARGET_LANGUAGE="German"
77	+	fi
78	+
79	+	TRANSLATED_RESULT=$(ollama run "$TRANSLATION_MODEL" "Translate the following text to $TARGET_LANGUAGE. Do not add any conversational filler, just provide the translation:\n\n$RESULT") \|\| { echo "Error: ollama failed to translate description." >&2; exit 1; }
80	+
81	+	echo "$TRANSLATED_RESULT"

Daha yeni Daha eski