#!/bin/bash
if [ -n "$FISH_VERSION" ]; then
echo "❌ FEHLER: Bitte starte das Script mit 'bash XAIGPUARC.sh' oder installiere 'bass'."
exit 1
fi
#| XAIGPUARC |✅|
#ATTENTION Markiert die Punkte zum Veraendern in XAIGPUARC wie Modelle, Prompt, CTX Size...
#| Das ist ein Ein Klick Programm:
#| Am Ende dieses Programmes finden sie Testbeispiele|✅|
#|Deutsch-Mathematik-Formel-Sprachprogramm|
#|04.05.2026|TIME|20:10|
#|GEHIRN-O-MAT + EIWEISS-COMPUTER = Sprachprogramm|
#9.)How START your XAIGPUARC
#0.)FIRST|||INTEL-ONE-API-BASEKIT-Toolkit!!!
#0.)Second Best Case is Use ARCH|Garuda|LINUX
#1.)Kopie|XAIGPUARC.sh|in your|Home/PCNAME|Folder
#2.)Between install of XAIGPUARC you can Download a .gguf|F16|AI fit your
#a.)V|RAM|/models/HereAINAME|your|Home/PCNAME/models/HereAINAME|Folder
#b.)The Standart Modell is: MathTutor-7B-H_v0.0.1.f16!!!
#7.) IF ANYTHING STOPS WITH MEMORY ERROR CHANGE LOWER CONTEXT SIZE CTX!!! 20480 Standart
#yay -S intel-oneapi-base-toolkit
#sudo pacman -Rns intel-compute-runtime intel-level-zero-gpu oneapi-level-zero
#pakemanager oneapi toolkit version install
#bass source /opt/intel/oneapi/setvars.sh --force
#sudo pacman -S intel-compute-runtime level-zero-loader intel-opencl-clang ocl-icd
#sudo mv /etc/OpenCL/vendors/intel64.icd /etc/OpenCL/vendors/intel64.icd.disabled
#export OCL_ICD_VENDORS=/etc/OpenCL/vendors/intel.icd
#Test your SYCL Device ist Working like that Example at A770LE:
#sycl-ls && clinfo | grep -i "Arc A770"
#Check and Start
#Change|your own Modell in the Textfile|twice|below!!
#Open your fish or bash Console|Type: chmod +x ./XAIGPUARC.sh Enter...
#START|with|type|Console ./XAIGPUARC.sh...
set -euo pipefail
IFS=$'\n\t'
PRECISION="FP16"
DEVICE="ARC"
LLAMA_CPP_DIR="llama.cpp"
BUILD_DIR="${BUILD_DIR:-XAIGPUARC}"
BUILD_DIR="${BUILD_DIR%/}"
#|XAIGPUARC|
GGML_SYCL_CPP="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/ggml-sycl.cpp}"
CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}"
NPROC="${NPROC:-$(nproc)}"
LOG_FILE="${BUILD_DIR}/XAIGPUARC.log}"
LLAMA_CLI_PATH="bin/llama-cli"
LS_SYCL_DEVICE_PATH="bin/llama-ls-sycl-device"
ADD_SUBDIR_LINE="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/ggml-sycl.cpp"
#|ONEAPIFUNKTIONEN
export LIBVA_DRIVER_NAME=iHD
export TCM_ROOT="${TCM_ROOT:-"/opt/intel/oneapi/tcm/latest/"}"
export SYCL_CACHE_PERSISTENT=0
export OCL_ICD_FILENAMES=""
export ZES_ENABLE_SYSMAN=0
export OverrideDefaultFP64Settings=0
export CCACHE_DIR="$HOME/.ccache"
export COMPILER_VERSION="2025.3"
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_BATCH_SIZE=256
export FP_FLAG=FP16
export ONEAPI_DEVICE_SELECTOR=level_zero:*
export SYCL_DEVICE_FILTER=level_zero:gpu
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
#|TREIBERKOMPATIBILITAET INTEL COMPUTE RUNTIME 26+
CR_VERSION=$(pacman -Q intel-compute-runtime 2>/dev/null | awk '{print $2}' | cut -d. -f1)
if [ "${CR_VERSION:-0}" -ge 26 ]; then
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0
export GGML_SYCL_DISABLE_OPT=1
echo "🔷INTEL COMPUTE RUNTIME 26+ ERKANNT KOMPATIBILITAETSMODUS AKTIV"
fi
#|HILFSFUNKTIONEN
log() { printf "🔷 %s\n" "$"; }
success() { printf "✅ %s\n" "$"; }
error() { printf "❌ %s\n\n" "$"; }
warn() { printf "⚠️ %s\n" "$"; }
#|INTERNETPRUEFUNG
check_internet() {
log "🔷PRUEFE INTERNETZ VERBINDUNG..."
if timeout 5 bash -c "</dev/tcp/8.8.8.8/53" 2>/dev/null; then
success "✅INTERNETZ VERBINDUNG VORHANDEN"
return 0
else
warn "⚠️KEINE INTERNETZ VERBINDUNG! ERSTINSTALLATION ONLINE ABHAENGIGKEITEN! ANSCHLUSS PRUEFEN"
return 1
fi
}
#|UMGEBUNG|RUECKFALL|MECHANISMEN|VORBEREITEN
prepare_environment() {
log "🔷HOLE ONEAPI KOPFZEILEN XAIGPUARC"
local SETVARS_PATH="/opt/intel/oneapi/setvars.sh"
if [ ! -f "$SETVARS_PATH" ]; then
error "❌ONE API KOEPFZEILEN NICHT GEFUNDEN $SETVARS_PATH INSTALLIERE ONEAPI BIBLIOTHEKEN"
exit 1
fi
log "🔷SETVARS SETZEN + SUCHEN SS+S"
source "$SETVARS_PATH" --force 2>/dev/null
local ONEAPI_ROOT_FALLBACK="/opt/intel/oneapi"
local COMPILER_VERSION_FALLBACK="${COMPILER_VERSION:-2025.3}"
DPCPP_ROOT="${DPCPP_ROOT:-${ONEAPI_ROOT_FALLBACK}/compiler/${COMPILER_VERSION_FALLBACK}}"
MKL_ROOT="${MKL_ROOT:-${ONEAPI_ROOT_FALLBACK}/mkl/${COMPILER_VERSION_FALLBACK}}"
ONEAPI_ROOT="${ONEAPI_ROOT:-${ONEAPI_ROOT_FALLBACK}}"
export CC=icx
export CXX=icpx
export FC=ifx
export DPCPP_ROOT
export MKL_ROOT
export ONEAPI_ROOT
export CPATH="${CPATH:-}:${MKL_ROOT}/include"
local LIB_DIR="/opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_sycl.so"
export LD_LIBRARY_PATH="./${BUILD_DIR}/bin:${LIB_DIR}:${LD_LIBRARY_PATH:-}"
if ! command -v icx "&>/dev/null" ! command -v icpx "&>/dev/null"; then
error "❌ICX IPCX ONE API XAIGPUARC UNTERMODUL INSTALLATION FEHLGESCHLAGEN"
exit 1
fi
log "🔷VERBINDUNG ONEAPI GELADEN DPCPP${DPCPP_ROOT}MKL${MKL_ROOT}"
}
#1|PROJEKT|VORBAU
setup_project() {
if [[ "${1:-}" == "--clean" ]]; then
warn "⚠️ REINIGUNGSMODUS LOESCHE ALTE BAUSTEINE"
rm -rf "${LLAMA_CPP_DIR}"
rm -rf "${BUILD_DIR}"
success "✅ REINIGUNG ABGESCHLOSSEN"
fi
log "🔷BEREITE XAIGPUARC VORBAU BITTE WARTEN"
if [ ! -d "${LLAMA_CPP_DIR}" ]; then
log "🔷ZIEHE BAUSTEINE AUS INTERNETZ"
git clone https://github.com/ggerganov/llama.cpp "${LLAMA_CPP_DIR}"
if [ ! -d "${LLAMA_CPP_DIR}" ]; then
error "❌ZIEHEN DER BAUSTEINE FEHLGESCHLAGEN ABBRUCH"
exit 1
fi
fi
if pushd "${LLAMA_CPP_DIR}" > /dev/null; then
log "🔷AKTUALISIERE BAUSTEINE FUER UNTERMODULE"
git reset --hard HEAD
git pull
git submodule update --init --recursive
popd > /dev/null
success "✅LADE BAUSTEINE IN XAIGPUARC UNTERBAUMODULE"
else
error "❌FEHLER HAUPTVERZEICHNIS'${LLAMA_CPP_DIR}'NICHT GEFUNDEN"
exit 1
fi
}
#PATCH|LOGIK:1-8+a+b+c|
patch_llama_cpp() {
log "🔷PATCH 1|8 GGML SYCL DOCTPHELPER REGESTRIERUNG BIBLIOTHEK KOPFZEILENEINTRAEGE"
local DPCT_HELPER_FILE="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/dpct/helper.hpp"
local CMAKE_LISTS_FILE="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/CMakeLists.txt"
local CUSTOM_KERNEL_DIR="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/custom_kernels"
local CUSTOM_KERNEL_SRC="${CUSTOM_KERNEL_DIR}/ggml_flash_attention_sycl.cpp"
local CUSTOM_KERNEL_CMAKE="${CUSTOM_KERNEL_DIR}/CMakeLists.txt"
local GGML_SYCL_CPP="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/ggml-sycl.cpp"
local KERNEL_SOURCE_LOCAL="ggml_flash_attention_sycl.cpp"
#1|9
if [ -f "$DPCT_HELPER_FILE" ]; then
log "🔷PATCH 1|9 DOCTPHELPER BIBLIOTHEK REGESTRIERTE KOPFZEILENEINTRAEGE LADEN"
if sed -i 's|#include <sycl/ext/oneapi/math.hpp>|#include <sycl/ext/intel/math.hpp>|g' "$DPCT_HELPER_FILE"; then
log "🔷PATCH 1|9 SCHREIBE DOCTPHELPER BIBLIOTHEK KOPFZEILENEINTRAEGE IN XAIGPUARC"
elif sed -i '#if !defined(DPCT_USM_LEVEL_NONE) && defined(DPCT_ENABLE_MKL_MATH).#endif|#include <sycl|ext|intel|math.hpp>|g' "$DPCT_HELPER_FILE"; then
log "✅PATCH 1|9 DOCTPHELPER BIBLIOTHEK KOPFZEILENEINTRAEGE ERFOLGREICH REGESTRIERT - SPEICHERE"
else
error "❌PATCH 1|9 DOCTPHELPER BIBLIOTHEK KOPFZEILENEINTRAEGE EINLADEN DER REGESTRIERUNG FEHLGESCHLAGEN"
return 1
fi
else
warn "⚠️PATCH 1|9 DOCTPHELPER BIBLIOTHEK NICHT GEFUNDEN ABHAENIGKEITEN PRUEFEN"
return 1
fi
#2|9
log "🔷PATCH 2|9 BAUE FLASH ATTENTION KERN"
#2a|9
if [ ! -d "$CUSTOM_KERNEL_DIR" ]; then
mkdir -p "$CUSTOM_KERNEL_DIR"
log "✅PATCH 2|9 ORNDER FUER FLASH ATTENTION KERN'${CUSTOM_KERNEL_DIR}'ANGELEGT"
fi
if [ -f "$KERNEL_SOURCE_LOCAL" ]; then
cp "$KERNEL_SOURCE_LOCAL" "$CUSTOM_KERNEL_SRC"
log "✅PATCH 2|9 ggml_flash_attention_sycl.cpp KERN'${KERNEL_SOURCE_LOCAL}' '${CUSTOM_KERNEL_SRC}'EINGEBAUT"
fi
if [ ! -f "$CUSTOM_KERNEL_SRC" ]; then
echo "ggml_flash_attention_sycl.cpp KERN VERZEICHNIS" > "$CUSTOM_KERNEL_SRC"
warn "⚠️PATCH 2|9 LADEN DER KERNELDATEI'${KERNEL_SOURCE_LOCAL} FEHLGESCHLAGEN"
fi
echo "
add_library(ggml_flash_attention_sycl OBJECT
ggml_flash_attention_sycl.cpp
)
target_include_directories(ggml_flash_attention_sycl PRIVATE ${GGML_SYCL_INCLUDE_DIRS})
target_compile_options(ggml_flash_attention_sycl PUBLIC ${GGML_SYCL_COMPILE_FLAGS})
" > "$CUSTOM_KERNEL_CMAKE"
log "🔷PATCH 2a|9 CMAKE TXT LISTEN OBJEKTE KOPFZEILENEINTRAEGE EINFUEGEN"
#2b|9
local ADD_SUBDIR_LINE="add_subdirectory(ggml_flash_attention_sycl)"
if ! grep -Fq "${ADD_SUBDIR_LINE}" "$CMAKE_LISTS_FILE"; then
if sed -i "|#add_subdirectory(dpct)|a ${ADD_SUBDIR_LINE}" "$CMAKE_LISTS_FILE"; then
log "✅PATCH 2b|9 ERFOLGREICH FLASH ATTENTION KOPFZEILENEINTRAEGE CMAKE TXT GESCHRIEBEN"
else
error "❌PATCH 2b|9 FLASH ATTENTION KOPFZEILENEINTRAEGE EINGLIEDERUNG CMAKE TXT FEHLGESCHLAGEN"
return 1
fi
else
log "🔷PATCH 2b|9 FLASH ATTENTION KOPFZEILENEINTRAEGE BEREITS AKTIV UEBERSPRINGE"
fi
#3|9
if [ -f "$CMAKE_LISTS_FILE" ]; then
log "🔷PATCH 3|9: CMAKE TEXT LISTEN MKL KOPZEILEN IC|P|X IMPLEMENTIERUNG VORBEREITEN"
local MKL_INCLUDE_PATH="${MKL_ROOT}/include"
local COMPILER_INCLUDE_PATH="${DPCPP_ROOT}/include"
local DPCPP_LIB_INCLUDE_PATH="${DPCPP_ROOT}/lib/dpcpp/include"
local ALL_INCLUDE_FLAGS="-I${MKL_INCLUDE_PATH} -I${COMPILER_INCLUDE_PATH} -I${DPCPP_LIB_INCLUDE_PATH}"
local PATCH_LINE="target_compile_options (ggml-sycl PUBLIC ${ALL_INCLUDE_FLAGS})"
local SEARCH_MARKER="#Add include directories for MKL headers"
if ! grep -Fq "${COMPILER_INCLUDE_PATH}" "$CMAKE_LISTS_FILE"; then
local SED_PATCH_LINE=$(echo "$PATCH_LINE" | sed 's| |\ |g; s|[|&]|&|g')
if sed -i "|${SEARCH_MARKER}|a $SED_PATCH_LINE" "$CMAKE_LISTS_FILE"; then
log "✅PATCH 3|9 ICP|X CMAKET LISTS TXT MKL KOPFZEILENEINTRAEGE EINGEFUEGT"
else
error "❌PATCH 3|9 ICP|X CMAKE LISTS TXT MKL NICHT GEFUNDEN ABHAENGIKEITEN PRUEFEN"
return 1
fi
else
log "✅PATCH 3a|9 CMAKE LISTS TXT PFAD SYCL GGML BEREITS BENUTZT UEBERSPRINGE"
fi
else
error "❌PATCH 3a|9 FEHLGESCHLAGEN CMAKE LISTS TXT SYCL GGML PFADE ABHAENGIGKEITEN GARUDA LINUX ARCH"
return 1
fi
#4|9
log "🔷PATCH 4|9 FLASH ATTENTION HAUPTKERN KOPFZEILENEINTRAEGE INJIZIEREN"
if [ -f "$GGML_SYCL_CPP" ]; then
#4a|9
local FA_REGISTER_CODE=$'//REGESTRIERE ggml_flash_attention_sycl.cpp \nextern "C"
void ggml_flash_attention_sycl(ggml_flash_attention_sycl * ctx, ggml_tensor *
dst, const ggml_tensor * Q, const ggml_tensor * K, const ggml_tensor * V);\n'
if ! grep -Fq "ggml_flash_attention_sycl" "${GGML_SYCL_CPP}"; then
#Anmerkung keine Berechtigung!!LoesenFlashattention is meine gemeint.
echo "${FA_REGISTER_CODE}" > /tmp/ggml_flash_attention_sycl.cpp
awk '/extern "C" void ggml_flash_attention_sycl/ { system("cat /tmp/ggml_flash_attention_sycl.patch"); } { print }' "${GGML_SYCL_CPP}" > /tmp/ggml-sycl.cpp.new
mv /tmp/ggml-sycl.cpp.new "${GGML_SYCL_CPP}"
cp "$GGML_SYCL_CPP" "$GGML_SYCL_CPP.bak"
if [ $? -eq 0 ]; then
log "✅PATCH 4a|9 AWK KOPFZEILENEINTRAEGE DEKLARATION EINGEFUEGT"
else
error "❌PATCH 4a|9 FEHLER EINFUEGEN FLASH ATTENTION KOPFZEILENEINTRAEGE DEKLARATION AWK FEHLT"
return 1
fi
else
log "🔷PATCH 4a|9 FLASH ATTENTION KOPFZEILENEINTRAEGE DEKLARATIONEN VORHANDEN FORTFAHREN"
fi
local FA_DISPATCH_CASE=$' case GGML_OP_FLASH_ATTN:\n ggml_flash_attention_sycl(ctx, dst, src0, src1, src2);\n break;'
if ! grep -Fq "case GGML_OP_FLASH_ATTN:" "${GGML_SYCL_CPP}"; then
log "🔷PATCH 4a|9 FUEGE ZWISCHENSPEICHER PER AWK KOPFZEILE EIN"
echo "${FA_DISPATCH_CASE}" > /tmp/fa_dispatch.patch
awk '/case GGML_OP_MUL_MAT_Q_K:/ { system("cat /tmp/fa_dispatch.patch"); } { print }' "${GGML_SYCL_CPP}" > /tmp/ggml-sycl.cpp.new
mv /tmp/ggml-sycl.cpp.new "${GGML_SYCL_CPP}"
if [ $? -eq 0 ]; then
log "✅PATCH 4a|9 AWK UNTERBAU KOPFZEILENEINTRAEGE EINGEFUEHRT"
else
error "❌PATCH 4a|9 FEHLER EINFUEGEN AWK KOPFZEILENEINTRAEGE"
fi
else
log "✅PATCH 4a|9 AWK UNTERBAU FLASH ATTENTION KOPFZEILENEINTRAEGE INJEKTION VORHANDEN FORTFAHREN"
fi
log "🔷PATCH 4b|9 ERFOLGREICH FLASH ATTENTION AKW UNTERBAU GELADEN"
else
error "❌PATCH 4b|9 FEHLGESCHLAGEN FLASH ATTENTION AWK PATCH KERN STOPP"
return 1
fi
#5|9
log "🔷PATCH 5/9 FLASH ATTENTION OBJEKT INJIZIERE VARIABLEN UNTERBLOCK SYCL BIBLIOTHEKEN"
local CMAKE_LISTS_FILE="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/CMakeLists.txt"
#5a|9
local VAR_LINE="set(FA_OBJECT_FILES "$<TARGET_OBJECTS:ggml_flash_attention_sycl>")"
local VAR_SEARCH_MARKER="set(GGML_SYCL_SOURCES"
if ! grep -Fq "FA_OBJECT_FILES" "$CMAKE_LISTS_FILE"; then
local SED_VAR_LINE=$(echo "$VAR_LINE" | sed 's/[/&]/\&/g')
if sed -i "|${VAR_SEARCH_MARKER}|a ${SED_VAR_LINE}" "$CMAKE_LISTS_FILE"; then
log "✅PATCH 5a|9 FLASH ATTENTION OBJEKT VARIABLE ERFOLGREICH DEFINIERT WEITER"
else
error "❌PATCH 5a|9 FLASH ATTENTION OBJEKT REGESTRIERUNG FEHLGESCHLAGEN STOPP"
return 1
fi
else
log "🔷PATCH 5a|9 FLASH ATTENTION OBJEKT VARIABLEN VORHANDEN UEBERSPRINGE"
fi
#5b|9
local TARGET_SEARCH_MARKER="target_sources(ggml-sycl PRIVATE ${GGML_SYCL_SOURCES})"
local NEW_TARGET_SOURCES_LINE="target_sources(ggml-sycl PRIVATE ${GGML_SYCL_SOURCES} ${FA_OBJECT_FILES})"
if grep -Fq "${TARGET_SEARCH_MARKER}" "$CMAKE_LISTS_FILE" && ! grep -Fq "${FA_OBJECT_FILES}" "$CMAKE_LISTS_FILE"; then
local SED_NEW_LINE=$(echo "$NEW_TARGET_SOURCES_LINE" | sed 's/[/&]/\&/g')
local SED_SEARCH_MARKER=$(echo "$TARGET_SEARCH_MARKER" | sed 's/[/&]/\&/g')
if sed -i "|${SED_SEARCH_MARKER}|${SED_NEW_LINE}|" "$CMAKE_LISTS_FILE"; then
log "✅PATCH 5b|9 ERFOLGREICHE GGML SYCL INJEKTIONEN"
else
error "❌PATCH 5b|9 GGML SYCL INJEKTION FEHLGESCHLAGEN"
return 1
fi
else
log "✅PATCH 5b|9 GGML SYCL AKTIV INJECTION UEBERSPRUNGEN"
fi
#6|9
log "🔷PATCH 6|9: SSMCONV CPP WARNUNG BEHEBEN VORZEICHEN VERGLEICH AKW PATCH"
local SSM_CONV_FILE="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/ssm_conv.cpp"
local SEARCH_LINE='GGML_ASSERT(src0->nb[1] == src0->ne[0] * static_cast<int64_t>(sizeof(float)));'
local REPLACE_LINE='GGML_ASSERT(src0->nb[1] == (size_t)(src0->ne[0] * sizeof(float)));'
if [ -f "$SSM_CONV_FILE" ]; then
#6|9a
if grep -Fq "${SEARCH_LINE}" "$SSM_CONV_FILE"; then
log "🔷PATCH 6a|9 SUCHE ssm_conv.cpp NATIVE ADRESSIERUNG"
if grep -Fq "${SEARCH_LINE}" "$SSM_CONV_FILE"; then
if sed -i "|${SEARCH_LINE}|${REPLACE_LINE}|g" "$SSM_CONV_FILE"; then
log "✅PATCH 6|9 SSMCONV CPP ERFOLGREICH GESCHRIEBEN"
else
error "❌PATCH 6|9 SSMCONV CPP FEHLGESCHLAGEN"
return 1
fi
else
warn "⚠️PATCH 6|9 SSMCONV CPP ZEILE NICHT GEFUNDEN UEBERSPRINGE"
fi
fi
#7|9
#Diese Groessen koennen selbst angepasst werden
log "🔷PATCH 7|9: ERZWINGE MAXIMALE BLOCK GROESSE 256 FUER ARC ALCHEMIST"
if [ -f "$GGML_SYCL_CPP" ]; then
if ! grep -q "GGML_SYCL_MAX_BLOCK_SIZE 256" "$GGML_SYCL_CPP"; then
sed -i 's/#define GGML_SYCL_MAX_BLOCK_SIZE [0-9]*/#define GGML_SYCL_MAX_BLOCK_SIZE 256/g' "$GGML_SYCL_CPP"
success "✅PATCH 7|9 ERZWINGE MAXIMALE BLOCK GROESSE 256 FUER ARC ALCHEMIST AKTIVIERT"
log "✅PATCH 7|9 MAXIMALE BLOCK GROESSE 256 ZWANG ERFOLGREICH INJIZIERT"
else
log "🔷PATCH 7|9 MAXIMALE BLOCK GROESSE 256 ZWANG FUER ARC BEREITS DEFINIERT"
fi
fi
#8|9
log "🔷PATCH 8|9 SYCL QUEUE ORDNER: HOME/XAIGPUARC OPTIMIERVORGANG KOPFZEILENEINTRAEGE"
local SYCL_FILE="${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/ggml-sycl.cpp"
if [ ! -f "$SYCL_FILE" ]; then
error "❌PATCH 8|9 KONNTE DIE DATEI ${LLAMA_CPP_DIR}/ggml/src/ggml-sycl/ggml-sycl.cpp NICHT FINDEN"
return 1
fi
if grep -q "sycl::property_list" "$SYCL_FILE"; then
sed -i 's|sycl::property_list prop_list{[^}]*}|sycl::property_list prop_list{sycl::property::queue::in_order{}}|' "$SYCL_FILE"
else
sed -i '/sycl::queue q{/i\ sycl::property_list prop_list{sycl::property::queue::in_order{}};' "$SYCL_FILE"
fi
fi
#8a|9
if grep -q "sycl::queue q{" "$SYCL_FILE"; then
sed -i 's|sycl::queue q{[^}]*}|sycl::queue q{dev, ctx, prop_list}|g' "$SYCL_FILE"
if grep -q "in_order" "$SYCL_FILE"; then
success "✅PATCH 8|9 SYCL QUEUE ERFOLGREICH KOPFZEILENEINTRAEGE OPTIMIERT"
else
error "❌PATCH 8|9 SYCL QUEUE AENDERUNGEN KOPFZEILENEINTRAEGE NICHT FERTIG"
return 1
fi
else
warn "⚠️PATCH 8|9 SYCL QUEUE KOPFZEILENEINTRAEGE NICHT GEFUNDEN UEBERSPRINGE"
fi
success "🔷ALLE FUENF VON ACHT EINGLIEDERUNGEN INTEL XE/ARC/iGPU/dGPU/eGPU FUER XAIGPUARC ERFOLGREICH"
patch_gcc16_t5 || return 1
}
#9|9
#GCC16 T5.CPP VORWAERTSDEKLARATION VOR INSTANTIIERUNG
patch_gcc16_t5() {
log "🔷PATCH 9|9 GCC16 T5.CPP VORWAERTSDEKLARATION VOR INSTANTIIERUNG"
local T5_FILE="${LLAMA_CPP_DIR}/src/models/t5.cpp"
if [ ! -f "$T5_FILE" ]; then
warn "⚠️PATCH 9|9 T5.CPP NICHT GEFUNDEN UEBERSPRINGE"
return 0
fi
if grep -q "XAIGPUARC_GCC16_PATCH" "$T5_FILE"; then
log "🔷PATCH 9|9 GCC16 VORWAERTSDEKLARATION BEREITS AKTIV UEBERSPRINGE"
return 0
fi
awk '/^std::unique_ptr<llm_graph_context> llama_model_t5::build_arch_graph/ { print ""; print "// XAIGPUARC_GCC16_PATCH: Vorwaertsdeklaration vor Instantiierung"; print "template <> llama_model_t5::graph::graph(const llama_model & model, const llm_graph_params & params);"; print "template <> llama_model_t5::graph::graph(const llama_model & model, const llm_graph_params & params);"; } { print }' "$T5_FILE" > /tmp/t5_patched.cpp && mv /tmp/t5_patched.cpp "$T5_FILE"
if grep -q "XAIGPUARC_GCC16_PATCH" "$T5_FILE"; then
success "✅PATCH 9|9 GCC16 T5.CPP VORWAERTSDEKLARATION ERFOLGREICH INJIZIERT"
else
error "❌PATCH 9|9 GCC16 T5.CPP VORWAERTSDEKLARATION FEHLGESCHLAGEN"
return 1
fi
}
#3|XAIGPUARC|BAUKONFIGURATION|
configure_build() {
log "🔷BEREITE XAIGPUARC KOPFZEILEN BAUVORGANG VOR"
local FP_MODE="${1:-1}"
local SYCL_FLAGS="-DGGML_SYCL_F16=${FP_MODE} -fsycl-device-code-split=per_kernel"
if [ ! -d "${BUILD_DIR}" ]; then
log "🔷LEGE XAIGPUARC IN ORDNER HOME ${BUILD_DIR}"
mkdir -p "${BUILD_DIR}" || { error "❌KONNTE ORDNER XAIGPUARC'${BUILD_DIR}'NICHT ANLEGEN"; return 1; }
fi
if pushd "${BUILD_DIR}" > /dev/null; then
log "🔷STARTE CMAKE TXT KOPFZEILENBAU XAIGPUARC ${FP_FLAG:-FP_MODE}"
cmake "../${LLAMA_CPP_DIR}"
-G "Unix Makefiles"
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
-DGGML_SYCL=ON
-DGGML_SYCL_TARGET=INTEL
-DGGML_SYCL_CCACHE=ON
-DGGML_SYCL_F16=${FP_MODE}
-DGGML_SYCL_FLASH_ATTN=ON
-DGGML_SYCL_MKL_SYCL_BATCH_GEMM=1
-DCMAKE_C_COMPILER=icx
-DCMAKE_CXX_COMPILER=icpx
-DCMAKE_CXX_STANDARD=23
-DCMAKE_CXX_STANDARD_REQUIRED=ON
-DCMAKE_CXX_EXTENSIONS=ON
-DGGML_SYCL_PRIORITIZE_DMMV=ON
-DGGML_SYCL_DISABLE_DNN=OFF
-DTHREADS_PREFER_PTHREAD_FLAG=ON
-DCMAKE_C_FLAGS="-O3 -ffast-math"
-DGGML_SYCL_DISABLE_GRAPH=OFF
-DCMAKE_THREAD_LIBS_INIT="-pthread"
-DCMAKE_C_COMPILER_WORKS=1
local CMAKE_STATUS=$?
popd > /dev/null
if [ ${CMAKE_STATUS} -ne 0 ]; then
error "❌CMAKE TXT KOPFZEILEN SCHREIBEN FEHLGESCHLAGEN"
return 1
fi
success "✅VORBAU ABGESCHLOSSEN"
else
error "❌KONNTE NICHT NACH HOME/XAIGPUARC WECHSELN'${BUILD_DIR}'COMPUTER NUTZER BERECHTIGUNG PRUEFEN"
return 1
fi
}
#4KOMPILIEREN
compile_project() {
log "🔷BAUE XAIGPUARC GRUND GERUEST STRUKTUR"
local LOG_FILE="build.log"
log "🔷KOPFZEILEN AUSGABE UNTERORNDER GESPEICHERT"
log "🔷BAUVORGANG LAEUFT XAIGPUARC SYCL C++ LEVEL ZERO KOPFZEILEN SCHREIBEN ABGESCHLOSSEN"
if pushd "${BUILD_DIR}" > /dev/null; then
log "🔷
|🟢|XAIGPUARC
|🔧|INSTALLATION auf ihrem Endgeraet in 🧰 .../HOME/USERNAME/XAIGPUARC/...|✅|
|📌|Zur DEInstallation einfach entsprechende ORDNER LOESCHEN!
|💡|min. 9GiB - 17GiB RAM DATENVORGAENGE 🔄
|🧱|VORBERRECHUNG SPRACHPROGRAMM ASSISTENT FUER ⚫ MATHEMATIK 🔧
|🟡|ACHTUNG! Wenn Sie DAS Hier Lesen Koennen...! :-)
|🔄|WIRD-es..
|📌|ERSTMALIG Mindestens-...
|💡|3 bis 7 Minuten-...
|💡|ANDAUERN!!!...
|⚙️|BITTE ETWAS GEDULD...
|✅||Beim zweitem Start mit demselbem Vorgang wie diesem, dauert es nur wenige Sekunden bis die KI startet!
|⚫|DUNKLE-MATHEMATIK 🧰 🔄 🎁 🔄 🔧 🔄 🎯 DEUTSCH-SPRACHPROGRAMM
|💡|NUTZEN SIE DEN MATH-TUTOR_F16
|🟡|A770LE 16GiB VRAM @ 14.2GiB@MathTutor-f16 MAXIMAL MATHEMATIK SPRUCH
|👉|DANKE FUER DIE NUTZUNG VON ❌AIGPUARC"
cmake --build . --verbose --config "${CMAKE_BUILD_TYPE}" -j ${NPROC} --target llama-cli llama-ls-sycl-device > "${LOG_FILE}" 2>&1
local BUILD_STATUS=$?
popd > /dev/null
if [ ${BUILD_STATUS} -ne 0 ]; then
error "❌XAIGPUARC KOPFZEILEN FEHLGESCHLAGEN UEBERPRUEFEN${BUILD_DIR}/${LOG_FILE}"
return 1
fi
success "✅BAU XAIGPUARC ERFOLGREICH"
else
error "❌XAIGPUARC '${BUILD_DIR}' WEGEN FEHLERHAFTEM WECHSEL KOPZEILENBAU NICHT MOEGLICH"
return 1
fi
}
#5AUTOMATISCHEGERAETEAUSWAHL
auto_select_device() {
log "🔷SUCHE NACH VERFUEGBAREN SYCL GERAETEN AUF IHREM SYSTEM"
local FULL_LS_PATH="./${BUILD_DIR}/${LS_SYCL_DEVICE_PATH}"
if [ ! -x "${FULL_LS_PATH}" ]; then
warn "⚠️SYCL UNTERBAU NICHT GEFUNDEN ${FULL_LS_PATH} ZWEITSUCHE ODER RUECKFALL AUF CPU"
export ONEAPI_DEVICE_SELECTOR="level_zero:${TARGET_ID}"
DEVICE="ARC"
return
fi
local DEVICES
DEVICES=$(bash -c "${FULL_LS_PATH}")
if [ -z "$DEVICES" ]; then
warn "⚠️KEINE KOMPATIBLEN SYCL GERAETE GEFUNDEN! SUCHE ERNEUT UND UEBERGEHE iGPU VOR dGPU NUTZUNG MIT dGPU"
export ONEAPI_DEVICE_SELECTOR="level_zero:0->❌ANBINDUNG FEHLGESCHLAGEN"
DEVICE="ARC"
N_GPU_LAYERS=99
return
fi
local ARC_ID
ARC_ID=$(echo "$DEVICES" | grep -i "Intel Arc" | head -n1 | awk '{print $1}')
local IGPU_ID
IGPU_ID=$(echo "$DEVICES" | grep -Ei "Iris|Xe|Graphics" | head -n1 | awk '{print $1}')
local TARGET_LINE=""
if [ -n "$ARC_ID" ]; then
TARGET_LINE=$(echo "$DEVICES" | grep -i "Intel Arc" | head -n1)
DEVICE="ARC"
elif [ -n "$IGPU_ID" ]; then
TARGET_LINE=$(echo "$DEVICES" | grep -Ei "Iris|Xe|Graphics" | head -n1)
DEVICE="iGPU"
else
export ONEAPI_DEVICE_SELECTOR="opencl:cpu"
DEVICE="CPU"
N_GPU_LAYERS=99
error "❌KEINE GEEIGNETE GRAFIKKARTE GEFUNDEN FALLE ENDGUELTIG AUF CPU ZURUECK"
return
fi
if [ -n "$TARGET_LINE" ]; then
local TARGET_ID
TARGET_ID=$(echo "$TARGET_LINE" | awk '{print $1}')
export ONEAPI_DEVICE_SELECTOR="level_zero:${TARGET_ID}"
log "🔷NUTZE INTEL XE/ARC GRAFIKKARTE ${DEVICE}(Device ${TARGET_ID})"
local VRAM_GIB_RAW=$(echo "$TARGET_LINE" | grep -oP '\d+(?=M)' | head -n1)
VRAM_GIB=$((VRAM_GIB_RAW / 1024)) #MIBzuGIB
if [ -z "${VRAM_GIB_RAW}" ]; then
VRAM_GIB_RAW=1024
fi
local LAYER_SIZE_MIB=256
local VRAM_MIB_CALC=$((VRAM_GIB * 1024))
if [ "${VRAM_GIB}" -lt 1 ]; then
VRAM_GIB=1
fi
N_GPU_LAYERS=$((VRAM_MIB_CALC * 99 / 100 / LAYER_SIZE_MIB))
if [ "$N_GPU_LAYERS" -gt 99 ]; then
N_GPU_LAYERS=99
fi
if [ "$N_GPU_LAYERS" -lt 1 ]; then
N_GPU_LAYERS=1
fi
log "🔷NGL${N_GPU_LAYERS}SCHICHTEN MODELL PRIORITAET GLEICHMAESSIG AUF GPU/CPU VERTEILT"
fi
}
#6SYCLKOMPATIBLEGERAETEPRUEFEN
list_sycl_devices() {
log "🔷SUCHE ZWEIMAL HINTEREINANDER SYCL FAEHIGES GERAET UM SUCHFEHLER ZU UMGEHEN"
local FULL_LS_PATH="./${BUILD_DIR}/${LS_SYCL_DEVICE_PATH}"
if [ -f "${FULL_LS_PATH}" ]; then
"${FULL_LS_PATH}"
else
warn "⚠️AKTUELL KEIN SYCL GERAET GEFUNDEN ${FULL_LS_PATH} VERSUCH ZWEI SUCHE..."
fi
}
CHANGE MODELL HERE BELOW!!!!
Aendern Sie ihr Modell hier unten!!!
CHANGE MODELL HERE BELOW!!!!
Aendern Sie ihr Modell hier unten!!!
#CalderaAI_Hexoteric-7B-F16.f16Yi-6B-200K-Llama-sharded.f16
#MathCoder2-DeepSeekMath-7B-f16Neumind-Math-7B-Instruct.F16Lucy-1.7B-F16PULI-LlumiX-32K-instruct-f16.f16
#7MODELLPFADWAEHLEN
prepare_model() {
MODEL_PATH=${1:-"models/Orchestrator-8B-f16_q8_0.gguf"}
mkdir -p models
if [ ! -f "$MODEL_PATH" ]; then
warn "⚠️IHR MODELL NICHT UNTER HOME/IHRNAME/MODELS GEFUNDEN! BITTE DORT HIN$MODEL_PATHKOPIEREN"
fi
export MODEL_PATH
}
#8MODELLAUSFUEHRENCalderaAI_Hexoteric-7B-F16.f16Neumind-Math-7B-Instruct.Lucy-1.7B-F16sauerkrautlm-7b-v1.Q8_0
run_inference() {
local DEFAULT_MODEL_PATH="models/Orchestrator-8B-f16_q8_0.gguf"
#CHANGE MODEL HERE ABOVE TWICE ! MODELL HIER DRUEBER DOPPELT AENDERN!
#MathTutor-7B-H_v0.0.1.f16PULI-LlumiX-32K-instruct-f16.Lucy-1.7B-F16MiniCPM4.1-8B-f16_q8_0gpt-oss-20b-F16
#kani-tts-400m-en-f16_q8_0.gguf
PROMPT: Aendern Sie diesen Prompt nach ihren eigenen Wuenschen/Change the Prompt for your own wishes!
local MODEL_PATH_ARG=${2:-$DEFAULT_MODEL_PATH}
local PROMPT_ARG=${3:-"
Pause
|0>SYSTEM<0|TRY TO DEVELOPE 2D-PAPERWORK-A4-DIN-7Dimensional-SYCL-STANDALONE-Kernel-Lettersize-12- Formula= cij=∑aik⋅bkj-|
Pause
|EXECUTE_ONLY_FULL_OPEN_SOURCE_CODE|DISABLE_DISCOURSE_MODE|DISABLE_NATURAL_LANGUAGE_INTRO|
Pause
|TEST001|RE-START////
Example Formula 1-10_Sentences:
/Language 1 ENG /
#|PCxTCxSWxAI|#(Probability_Calculation)X(Time_Chain)X(Skynet_Work)X(Artifactial_Inference)|
/Language 2 GER //
#|PBxZKxHWxSM|#(Probabilistisch[e]Berechnung[e(n)])X(Zeit[K]ette[n])X(Himmelsnetz[W]erk[e])X(Kuenstliche_Inferenz)|
/Language 3 LAT ///
#|PCxTCxCRCxIA|#(Probabilitatum_Computatio)X(Temporis_Catena)X(Contextus_Reteque_Caeli)X(Inferentia_Artificialis)|
|TEST|000|END\|
|00|INSTRUCTION=|
\
|FORMAT=->FORMULA_FINDER+2D-PAPER-DIN-A4-SYCL_KERNEL_GENERATOR /
|Mode=->Matrix-Multiplikation=cij=∑k=1naik⋅bkj /
(Cmatch->,Condition->,Completeness->,S =
(If = CONDITION_match = COMPLETE_SET_ad of MATH_PHYSIC_LOGIC) =
(,S1 (,C++,icx,icpx,sycl_ext)),+S2 (,16x16matrizen logic =
(welcome but Vectorized-32-bit-Logic-Topic,))),+S3
(,sycl-ggml,OFFLOAD,)))),+S4 (,FP16_32-bit_ALU_OP16x16;))))) =
(Oppression,-> Meta,-> ->>suppress_meta_comments->>>>
(output_all_sections_add Add_Only_Pure_Logic_Sections)))))) =
(discret>Solutionroom_get = PRINT_ALL_SECTIONS_add))))))) =
(FP16_32-bit_math.cl; = Fi;))))))));\
|00|After|Promt|Analysis|You|Recive|Input|Text|Questions-Build up-on-your own-answer!
\
|01|00-42|MAIN-/TASKS/|
|1.|Word|2D-Format-A4-DIN-PROGRAMM-short|Basic-C++|math-analysis-code|32-bit|vector-intrinsics|graph
|2.|Identify|ambiguities-missIng-information-assumptions-input
|3.|Produce|clear-FULL|Info-Tech|math|c++|icpx|icx|sycl|Code|Vector|32-bit
|4.|If|Multiple-Valid-Answers|Solutions-Exist|then-LiSt|Briefly|Print-Preference
|5.|Logic-Chain-Activation||Rank|Optimization-Paths|Latency-vs-Throughput|
\
|02|CONSTRAINTS/|
|Do||Limit|response|to|maximum|10-SENTENCES!!|Strict|one|sentence|preferred!!|
|Do|not|elaborate|
|Do|not|explain|reasoning
|Do|not|invent|missing|details
|Plain|neutral|piCtured|language
|Keep|total|response|conciSe|structured
|Do|not|include|meta|commentary
\
|03|OUTPUT|FORMAT-2D-A4-PAPER/||MODE=EXECUTE|OUTPUT=SECTIONS|NO_PARAPHRASE|NO_EXAMPLE|/
|Section1|Restatement
|Section2|Ambiguities|Missing|Information
|Section3|Minimal|ANswer|Exception:EXCEPTION|LIMIT=1-3_SENTENCES|
|Section4|Possible|Alternativ
#1.|Word|Short|PROOF-OF-ANSWER/LIMIT=1-10_SENTENCES
#2.|IdEnTiFy|cij = ∑k=1naik⋅bkj; SYCL 16x16 Matrix Multiplication Kernel; FP16 32-bit precision.
#3.|KEY WORDS:|XMX SYCL_COMPILER_HINT|icpx -fsycl -O3 Float@TARGET=SYCL|VECTOR|32BIT|16X16|
#4.|If>Multiple>Valid>Solutions>MAX_THREE_Exist>>Then>List>Print>>>Preference
#5.|List|briefly|print|Precision-FP16@32-bit|Aligment-Zero-Copy-Focus
\
|04|Beginn|Processing/|EXIT-without-REPEATING!!!
|Add|Section1:Restatement
|And|Section2:Ambiguities-Missing-Info
|And|Section3:Minimal PROOF-of-AnSwer
|And|Section4:Possible-AlternatiVe
|Sol|SUPPRESS_META_COMMENTARY
|Set|ATTRIBUTE_MINIMAL_CONCISE
|CONTROL|IGNORING_EVERYTHING_ELSE
|And|
|EXECUTE|PRINT-2D-A4-DIN-PaperBLOCKStyle-_-ALL_SECTIONS|OUTPUT=SINGLE_SENTENCE|MAX-TEN-SENTENCES|
|TERMINATE
|Fi|
|MAIN|0-5|ENDE
"
}
local GPU_ID=$(echo "$ONEAPI_DEVICE_SELECTOR" | awk -F':' '{print $2}')
local NGL_SET=${N_GPU_LAYERS:-99}
local FULL_LLAMA_CLI_PATH="./${BUILD_DIR}/${LLAMA_CLI_PATH}"
#Aendern Sie diese Werte, wenn ihnen
#Speicherfehler angezeigt werden nach Unten hin ab!
local CONTEXT_SIZE=8096
#NEUE WERTE SETZEN: 512 1024 2048
#Standart:4096,0x1000
#Empfohlen:8192,0x2000 MathtTutor:16384,0x4000-20480,0x5000|
#Kritisch:24576|0x6000 32768|0x8000|65536|131072|20480|262144|524288|
local PREDICT_TOKENS=8096
#Aendern Sie den obigen Wert nach Unten hin ab
local layer=${N_GPU_LAYERS:-99}
local TENSOR_SPLIT=99
local row=99
log "🔷STARTE KI ANTWORT MIT PARAMETER${DEVICE}(ID: ${GPU_ID})NGL WERT GLEICH${NGL_SET}${FULL_LLAMA_CLI_PATH}"
if [ ! -x "${FULL_LLAMA_CLI_PATH}" ]; then
error "❌FEHLER AKTUELLER UNTERBAU NEUBAU FEHLGESCHLAGEN${FULL_LLAMA_CLI_PATH}"
return 1
fi
ZES_ENABLE_SYSMAN=1 "${FULL_LLAMA_CLI_PATH}"
-no-cnv
--n-cpu-moe 35
--no-mmap
--mlock
--cache-type-k f16
--cache-type-v f16
--model "${MODEL_PATH_ARG}"
--prompt "${PROMPT_ARG}"
--n-predict "${PREDICT_TOKENS}"
--ctx-size "${CONTEXT_SIZE}"
-ngl "${N_GPU_LAYERS}"
--keep 8096
--main-gpu ${GPU_ID}
--color auto
echo "✅SPRACHMODELL INTERAKTIONS FUNKTION JETZT AKTIV"
}
#DEFINITIONHAUPTFUNKTION
main() {
local FP_MODE="${1:-1}"
local RERUN_BUILD=1
prepare_environment
#01
local FULL_LLAMA_CLI_PATH="./${BUILD_DIR}/${LLAMA_CLI_PATH}"
local FULL_LS_PATH="./${BUILD_DIR}/${LS_SYCL_DEVICE_PATH}"
if [[ -f "${FULL_LLAMA_CLI_PATH}" ]] && [[ -f "${FULL_LS_PATH}" ]]; then
success "✅ XAIGPUARC WERKZEUGK09ASTEN VORHANDEN NEUBAU UNNOETIG${FULL_LLAMA_CLI_PATH} ${FULL_LS_PATH}"
log "🔷UEBERSPRINGE BAUVORGANG WERKZEUGKASTEN"
RERUN_BUILD=0
else
warn "⚠️KEIN AKTUELLES XAIGPUARC GEFUNDEN WIRD NEU GEBAUT"
RERUN_BUILD=1
fi
if [[ "$RERUN_BUILD" -eq 1 ]]; then
log "🔷STARTE ERSTMALIGEN BAUVORGANG XAIGPUARC"
if check_internet; then
log "🔷LADE WERKZEUGKASTEN"
#0
setup_project
#1
patch_llama_cpp
#2
else
warn "⚠️INTERNET NICHT VERFUEGBAR UEBERSPRINGE AUFWERTUNG WERKZEUGKASTEN LOKALE VERSION"
fi
fi
configure_build "${FP_MODE}"
#3
compile_project
#4
auto_select_device
#5
list_sycl_devices
#6
prepare_model "${2:-}"
#7
run_inference "${2:-}" "${3:-}"
#8
log "✅NUTZUNG VON XAIGPUARC JETZT MOEGLICH /IHRE FRAGE NACH > ... DRUECKEN SIE |ENTER${BUILD_DIR}/${LLAMA_CLI_PATH}"
}
#HAUPTSCHLEIFE
main "${1:-1}" "${2:-}" "${3:-}"
#42
log "✅KOMPLETTER BAUVORGANG HIER GESPEICHERT${LOG_FILE}"
#ENDE DES PROGRAMMS / END OF APPLICATION
##--##--##--##--##--##--##--##--##
###--TEST AND EXAMPLE HEAVEN--###
Xe/i915
##--##--##--##--##--##--##--##--##
#XAIGPUARC|Hardware|Build|Test
#Intel|ARC|Alchemist|Battlemage|Calestial|Druid|A770LE|16GB|750LE|8GB
#90|142|Watt Chip Power Draw alone each Card different LLMs
#Example|GPT-OSS-20B-F16 very nice low Wattage
#needlonger fullworking|MathTutorF16|142 Watt
#Use|multible Models better Workflow
#All|Hardware Modded not Stock Compareable
#PLS|watch Cooling Dust Free System
#Single|Dual dGPUs AMD Ryzen 2600 2700x i7 6700K Z170 RAM 16GiB 128GiB
#Intel|iGPU XE Alder Lake Gen 12700H 12650H A730m 12GiB 32GB DDR4|5 RAM
#Core|Ultra|7|155H|MeteorLake|8|Core|Xe-LPG|128EU|1024Alu|ARC|11.5GiBVram
#Quad|Channel|Bandwith|RAM|Gear2|718GB|s
#11.5|GiBVRAMsharedDDR5xLPRAM
#155H|i7|GPT|OSS|20B|F16.gguf|low|30|Watt|allinone|with|mod
#BF16|Models|NOT|recommend|FOR|Alechmist
#|6-16GiB+|F16|Model|START-END
#|6GiB+|F16|GPU|A730m|A380|A570m
#1.5B_Math_Tutor-GGUF-F16 03.09|GiB
#math-professor-3B-GGUF-F16 06.18|GiB
#Neumind-Math-7B-Instruct.nhbeJrd8.F16.gguf 9GIB
#Neumind-Math-7B-Instruct.F16 14.2 GIB
#EVA-GPT-Germa-v7B-Q6_K.gguf 05.50|GiB
#OpenMath-Mistral-7B-v0.1-hf_Q6_K
#kani-tts-400m-en-f16_q8_0.gguf |00.53|GiB|FAST|CTX|NPG|8K|A770LE:|588.6 Pt|s 62.4 Gt|s100w2.4Ghz|FIRESTARTER
#baidu.ERNIE-4.5-0.3B-Base-PT.f16.gguf |00.69|GiB|FAST|CTX|NPG|8K|A770LE:|469.7 Pt|s 52.5 Gt|s97w2.4Ghz+|Mid
#MedScholar-1.5B-f16_q8_0.gguf |02.10|GiB|FAST|CTX|NPG|8k|A770LE:|528.2 Pt|s 25.2 Gt|s109w2.4Ghz-|HiQ
#Qwen2.5-VL-3B-Instruct-f16-q4_k.gguf |02.10|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|CPU
#yasserrmd.DentaInstruct-1.2B.f16.gguf |02.20|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULLHiQ
#DeepCoder-1.5B-Preview-f16_q8_0.ggufUSE BETTER|02.20|GiB|FAST|CTX|NPG|8k|A770LE:|513.2 Pt|s 23.3 Gt|s112w2.3Ghz+|Mid
#ibm-granite.granite-4.0-1b.f16.ggufNO-FUNKTION|03.00|GiB|NOTS|CTX|NPG|0k|A770LE:|569.4 Pt|s 18.2 Gt|s120w2.3Ghz-|GPU-INF
#Lucy-1.7B-F16.gguf LOVE TWO AFTER MATHTUTOR7B|03.20|GiB|FAST|CTX|NPG|65k|A770LE:|320.7 Pt|s 22.2 Gt|s108w2.4Ghz-|EXT
#granite-4.0-micro-f16_q8_0.gguf |04.60|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#gemma-2-2b-it.F16.gguf |04.90|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|CPU
#|8GiB+|GPU|A750LE
#Fathom-Search-4B-f16_q8_0.gguf |05.50|GiB|FAST|CTX|NPG|8k|A770LE:|569.4 Pt|s 18.2 Gt|s118w2.4Ghz-|Think
#Qwen2.5-7B-Instruct-f16-q4_k.gguf |05.70|GiB|FAST|CTX|NPG|8k|A770LE:|511.5 Pt|s 19.7 Gt|s142w2.4Ghz-|CPU
#Qwen2.5-VL-3B-Instruct-f16.gguf |05.80|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|CPU
#llama3bthinkingonly5B.f16.gguf |06.00|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#|10-12GiB+|iGPU|XeLPG|A730m|A580|B570|B580|PRO|A|B60|A|B50
#UIGEN-X-4B-0729-f16_q8_0.gguf |06.20|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#granite-4.0-h-tiny-f16_q8_0.gguf |07.00|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Trinity-Nano-Preview-f16_q8_0.gguf |07.20|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Qwen3-Embedding-4B-f16.gguf |07.50|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Qwen3-4B-f16.gguf |07.50|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|LThink
#Nemotron-Mini-4B-Instruct-f16.gguf |07.80|GiB|FAST|CTX|NPG|8k|A770LE:|717.8 Pt|s 17.8 Gt|s118w2.4Ghz-|
#Minitron-4B-Base.FP16.gguf |07.80|GiB|FAST|CTX|NPG|4k|A770LE:|764.3 Pt|s 16.3 Gt|s131w2.4Ghz+|MID
#t5-v1_1-xxl-encoder-f16.gguf |08.90|GiB|FAST|CTX|NPG|8k|A770LE:|361,8 Pt|s 6 Gt|s101w2.4Ghz-|NICE
#|16GiB+|GPU|A770LE|iGPU|Meteor|Lake
#DiffuCoder-7B-cpGRPO-f16_q8_0.gguf NOT GOOD |10.50|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#MiMo-Embodied-7B-f16_q8_0.gguf |10.70|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#MiniCPM4.1-8B-f16_q8_0.gguf |11.00|GiB|FAST|CTX|NPG|8k|A770LE:|842.9 Pt|s 11.0 Gt|s142w2.4Ghz+|MidThink
#KernelLLM-f16_q8_0.gguf |11.10|GiB|FAST|CTX|NPG|8k|A770LE:|688.5 Pt|s 11.2 Gt|s137w2.4Ghz-|MATHKERN
#Jan-v2-VL-high-f16_q8_0.gguf |11.40|GiB|FAST|CTX|NPG|8k|A770LE:|639.6 Pt|s 10.2 Gt|s135w2.4Ghz-|Think
#Nemotron-Orchestrator-8B-f16_q8_0.gguf |11.40|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Orchestrator-8B-f16_q8_0.gguf |11.40|GiB|FAST|CTX|NPG|8k|A770LE:|640.4 Pt|s 10.2 Gt|s134w2.4Ghz-|LThink
#MiroThinker-v1.0-8B-f16_q8_0.gguf |11.40|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Seed-Coder-8B-Reasoning-f16_q8_0.gguf |11.50|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Ministral-3-8B-Reasoning-2512-f16_q8_0.gguf |11.70|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#ggml-model-f16.gguf |12.60|GiB|FAST|CTX|NPG|4k|A770LE:|1012.7 Pt|s 13.5 Gt|s142w2.4Ghz-|NotStable
#gpt-oss-20b-F16.gguf |12.80|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Navid-AI.Yehia-7B-preview.f16.gguf |13.00|GiB|FAST|CTX|NPG|4k|A770LE:|1273.4 Pt|s 13.4 Gt|s142w2.4Ghz-|HiQ
#Mistral-7B-Instruct-v0.3.fp16.gguf |13.50|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#allenai.Olmo-3-7B-Think.f16.gguf |13.60|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#Mamba-Codestral-7B-v0.1-F16.gguf |13.60|GiB|SLOW|CTX|NPG|8k|A770LE:|110.1 Pt|s 3.2 Gt|s97w2.4Ghz+|FULL|GOOD
#MathTutor-7B-H_v0.0.1.f16.gguf |14.20|GiB|FAST|CTX|20k!|NPG|512k|A770LE:|467.7|12t|s142w2.4Ghz|BEST|HiQ!
#|END|F16|MODEL|LIST
#START|Q8-Q4-IQ4-2|MODEL|LISTNOTF16|6GiB+|GPU|A730m|A380|A380
#phi-2.Q4_K_M.gguf |01.70|GiB|FAST|CTX|NPG|8k|A770LE:|888.6 Pt|s 25.4 Gt|s128w2.4Ghz-|EXT|N1
#openhermes-2.5-mistral-7b.Q4_K_M.gguf |04.10|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|CPU
#mistral-7b-instruct-v0.2.Q4_K_M.gguf |04.10|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HQ
#|8GiB+|GPU|A750LE
#OpenMath-Mistral-7B-v0.1-hf_Q6_K.gguf |05.50|GiB|FAST|CTX|NPG|8k|A770LE:|1233.9 Pt|s 14.4 Gt|s145w 2.4Ghz-|OLDSC
#NVIDIA-Nemotron-Nano-12B-v2-IQ4_NL.gguf |06.60|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#wizardcoder-python-7b-v1.0.Q8_0.gguf |06.70|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#sauerkrautlm-7b-v1.Q8_0.gguf |06.70|GiB|FAST|CTX|NPG|16k|512k|A770LE:|1364.6 Pt|s12.1 Gt|s142w2.4Ghz-|CPU
#|10-12GiB+|iGPU|XeLPG|A730m|A580|B570|B580|PRO|A|B60|A|B50
#Qwen3-16B-A3B-IQ4_NL.gguf |08.50|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|CPU
#Qwen3-30B-A3B-UD-IQ2_XXS.gguf |09.70|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s120w2.4Ghz-|CPU
#solar-10.7b-instruct-v1.0-uncensored.Q8_0.gguf|10.60|GiB|FAST|CTX|NPG|8k|A770LE:|985.6 Pt|s 7.5 Gt|s135w2.4Ghz-|CPU|VIP
#gpt-oss-20b-claude-4-distill.MXFP4_MOE.gguf |11.30|GiB|SLOW|CTX|NPG|8k|A770LE:|35.4 Pt|s 8.7 Gt|s92W2.2Ghz+|FULL|CPU
#gpt-oss-20b-mxfp4.gguf |11.30|GiB|SLOW|CTX|NPG|8k|A770LE:|35.5 Pt|s 8.8 Gt|s90W2.3Ghz+|FULL|HiQ
#velara-11b-v2.Q8_0.gguf |11.30|GiB|FAST|CTX|NPG|8k|A770LE:|613.4 Pt|s 14.5 Gt|s 120w2.4Ghz-|CPU
#|16-24GiB+|A770LE|B60
#flux1-kontext-dev-Q8_0.gguf |11.80|GiB|NO|SUPPORT|CTX|NPG|8k|A770LE:|985.6 Pt|s7.5 Gt|s135w2.4Ghz-|ViP
#Deepseek-Coder-V2-Lite-13B |11.00|GiB NO|SUPPORT|CTX|NPG|8k|A770LE:|985.6 Pt|s7.5 Gt|s135w2.4Ghz-|ViP
#Instruct-sft-s1K.i1-Q6_K.gguf |13.10|GiB|FAST|CTX|NPG|8k|A770LE:|22.7 Pt|s7.9 Gt|s98W2.4Ghz-|OK
#ENDE
#VectorAdd
#max. x2 or x3
#Workgroups for Consumer
#42 (2x42 or 3x42)
#Work_Items
#21504
#Workgroup_Size
#512
#SIMD
#32
#Threads per Workgroup
#16
#Threads
#672
#Free Open Source Download of XAIGPUARC:
#https://github.com/alucianOriginal/XAIGPUARC/blob/main/XAIGPUARC.sh
#0.|TRIOINFERNAL:
#1.|XAIGPUARC-sycl-ggml|Treiber/Umgebung
#2.|Scheduler-sycl-ggml|Daten|32-bit/VektorFormation
#3.|FlashAttention-sycl-ggml|VRAM/Bandbreite/Zwischenspeicher/Cache
#|AUTOMATOR|INTERPRATOR|IMPRESSOR|IMPERATOR|INTERPREDATOR
#|IMPETRATOR|IMPRESSATOR|INTERPREDATOR|EDIT-I-ON
#|AU-TO-MA-TOR-IT|GE-H-IRN-O-MAT|EI-WEISS-COM-PUTER
#|USE WISE AND CARE FULL PLS|PROOF OF ANSWERS|MAYBE EZ WITH OTHER AI KI
#No Cherry Picking 14.2GiB Layer with MathTutor-7B-F17.gguf Demo with XAIGPUARC on A770LE16GiB i915:
#TESTPROMPTS everytime the same big Testprompt
#First:
#... (truncated)
#cij = ∑k=1n aik ⋅ bkj; optimize with SYCL kernel, use FP16 32-bit precision for alignment and zero-copy focus.
#[ Prompt: 1080,3 t/s | Generation: 13,2 t/s ]
#> >>> CHAT HERE! ENTER ANSWER > CHAT ENTER ANSWER > CHAT
#Second:
#... (truncated)
#cij = ∑k=1n aik ⋅ bkj; optimize with SYCL, use FP16 32-bit precision, focus on zero-copy alignment.
#[ Prompt: 1202,1 t/s | Generation: 13,6 t/s ]
#> CHAT HERE! ENTER ANSWER > CHAT ENTER ANSWER > CHAT
#Third:
#|? ... (truncated)
#cij = ∑k=1n aik ⋅ bkj; optimize with SYCL; use FP16 32-bit precision; focus on zero-copy alignment.
#[ Prompt: 1163,3 t/s | Generation: 13,6 t/s ]
#> >>> CHAT HERE! ENTER ANSWER > CHAT ENTER ANSWER > CHAT
#Four:
#|? ... (truncated)
#cij = ∑k=1naik⋅bkj; SYCL kernel for optimization; use FP16 32-bit precision; focus on zero-copy alignment.
#[ Prompt: 1183,8 t/s | Generation: 13,5 t/s ]
#> #> CHAT HERE! ENTER ANSWER > CHAT ENTER ANSWER > CHAT
#Five:
#|? ... (truncated)
#cij = ∑k=1naik⋅bkj; optimize with SYCL for FP16 32-bit precision, use icpx -fsycl -O3
#Float@TARGET=SYCL, vector intrinsics for alignment-zero-copy focus.
#[ Prompt: 1176,3 t/s | Generation: 13,5 t/s ]
#> Here can stand your TEXT!!!
#Six:
#At another day with more doings beetween let us see the suprise i think we got slower avg.
#|? ... (truncated)
#cij = ∑k=1n aik ⋅ bkj; SYCL kernel for optimization; prefer FP16 32-bit precision, zero-copy alignment.
#[ Prompt: 1200,3 t/s | Generation: 13,6 t/s ]
#> Ok Not realy :-) Happy AI!
#|? ... (truncated)
#cij = ∑k=1naik⋅bkj; SYCL kernel for FP16 32-bit precision; Use icpx -fsycl -O3
#Float@TARGET=SYCL for optimization; Zero-copy alignment preferred.
#[ Prompt: 1179,5 t/s | Generation: 13,6 t/s ]
#> THIS IS THE CHATLINE! USE UIT (INFORMATION-TECHNOLOGY)
#Eight:
#|? ... (truncated)
#cij = ∑k=1n aik ⋅ bkj;
#SYCL kernel optimization; FP16 32-bit precision; alignment-zero-copy-focus.
#[ Prompt: 1182,2 t/s | Generation: 13,7 t/s ]
#> EIGHT TIMES SIMILIAR :-) All the other devices scale well known values without any worth to talk about
#With Xe Driver you will get round about 16-17 Generated Tokens per Second.
#16k-20480 CONTEXT_SIZE 131072 PREDICT_TOKENS-///F16 Modells Recommed to use only!
#ai,
#local,
#costs,
#energy,
#benchmark,
#realworld,
#computer,
#minimum,
#spec,
#low,
#vram,
#gpu,
#cpu,
#igpu,
#dgpu,
#old,
#fast,
#apu,
#arc,
#intel,
#alchemist,
#a750,
#a770,
#a730m,
#a580,
#a570,
#a50,
#a60,
#b580,
#b570,
#b50,
#b60,
#a380,
#a310,
#sycl,
#f16,
#high,
#qualitiy,
#answers,
#gguf,
#ggml,
#it,
#compute,
#mkl,
#itx,
#icpx,
#mem,
#auto,
#search,
#decive,
#oneshotapp,
#localai,
#lowspecai,
#precision,
#audit,
#zero,
#rounding,
#error,
Die Aufwertung dreht sich um die ganzen neuen Abhaengigkeiten von den Treibern. Vielleicht bekommt man es jetzt gestartet als Interessent. Bei mir laufen die jetzt wieder ueberall ohne Probleme. Ich habe auch Automatisiert, das es Egal ist, welche OneAPI version installiert ist, das Programm frisst was da ist. Salve
Edit und natuerlich im Zweifel die Github Version benutzten! Ich weis nicht ob der Hive alles ordentlich Speichert, das sind nur die Sicherungskopien.
https://github.com/alucianOriginal/XAIGPUARC/blob/main/XAIGPUARC.sh
Ich baue an einer automatischen Dual GPU Erkennung.
Das bedeutet vor allem fuer Laptop Besitzer, also Workstations und Gamining Laptops, keine Notebooks, und fuer Normale Personal Computer, mit einer CPU, welche eine iGPU integriert hat, das man dickere Modelle laden kann.
Je nach Konfiguration und iGPU, kann hier mit einem Teil des gewoehnlichen Arbeisspeichers gerechnet werden, plus die Unterstuetzung der Hauptgrafikkarte.
Ich baue das so, das man da nix Einstellen muss, das System richtet sich einfach nur nach dem Modell, wenn das dann zu Groß ist, erkennt es das, das ist schon Fertig, und teilt die Arbeit einfach auf die beiden Grafikprozessoren auf.
Ansich funktioniert die Sache soweit, aber die gute alte Gereateerkennung auf SYCL Basis, mit zwei statt einer GPU zu implementieren, ist nicht so einfach.
Bei einer GPU muss das Programm auch zweimal fragen, weil das "zu Schnell" geht und einfach abbrechen wuerde ohne zweite Abfrage.
Vielleicht muss ich bei zwei GPUs, dann dreimal abfragen. Auf jeden Fall ist das ein dicker Klotz und meine Arbeit geht Gut vorran, weil ich so langsam irgendwie wieder sowas wie einen Kopf zum Denken habe nachdem so viele Baustellen inzwischen halbwegs abgearbeitet sind.
Bei Fragen, einfach Fragen fragen.
Salve
Alucian