From d65599b1c3916fa0063985b5e7b34c902b208e16 Mon Sep 17 00:00:00 2001 From: ramir1 Date: Thu, 18 Sep 2025 18:31:55 +0200 Subject: [PATCH 1/3] Installing the Ukrainian Lettimizer Installing the Ukrainian Lettimizer --- Dockerfile | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 770d07b..698f034 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,7 +70,7 @@ RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then export ARCH="arm"; else expo && wget -q https://repo.manticoresearch.com/manticore-dev-repo.noarch.deb \ && dpkg -i manticore-dev-repo.noarch.deb \ && apt-key adv --fetch-keys 'https://repo.manticoresearch.com/GPG-KEY-manticore' && apt-get -y update \ - && apt-get -y install manticore manticore-extra manticore-load manticore-language-packs;\ + && apt-get -y install manticore manticore-extra manticore-load manticore-lemmatizer-uk manticore-language-packs;\ elif [ ! -z "$DAEMON_URL" ]; then \ echo "2nd step of building release image for linux/${ARCH}64 architecture" \ && echo "ARCH: ${ARCH}" \ @@ -98,6 +98,47 @@ RUN if [ -d "/packages/" ]; then apt -y install /packages/*deb; fi \ && tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/ \ && rm /tmp/*.pak.tgz +# Installing the Ukrainian Lettimizer +# Installing dependencies for building Python +RUN cd /tmp && \ + apt-get update + +RUN apt-get install -y \ + build-essential \ + libreadline-dev \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + tk-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + libffi-dev \ + zlib1g-dev + +# Download and compile Python 3.9 +WORKDIR /tmp +RUN wget https://www.python.org/ftp/python/3.9.4/Python-3.9.4.tgz && \ + tar xzf Python-3.9.4.tgz + +WORKDIR /tmp/Python-3.9.4 +RUN ./configure --enable-optimizations --enable-shared && \ + make -j$(nproc) altinstall + +# Updating the linker cache +RUN ldconfig + +# Installing pymorphy2 and the Ukrainian dictionary +RUN LD_LIBRARY_PATH=/tmp/Python-3.9.4 pip3.9 install pymorphy2[fast] && \ + LD_LIBRARY_PATH=/tmp/Python-3.9.4 pip3.9 install pymorphy2-dicts-uk + +RUN rm -rf /tmp/Python-3.9.4* /tmp/manticore-repo.noarch.deb && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /var/lib/manticore +# END: Installing the Ukrainian Lettimizer + COPY manticore.conf.sh /etc/manticoresearch/ RUN sed -i '/log = \/var\/log\/manticore\/searchd.log/d;/query_log = \/var\/log\/manticore\/query.log/d' /etc/manticoresearch/manticore.conf RUN md5sum /etc/manticoresearch/manticore.conf | awk '{print $1}' > /manticore.conf.md5 From c1eeede15ffacc22530cd87cdfc9d2d695fcb745 Mon Sep 17 00:00:00 2001 From: djklim87 Date: Thu, 16 Oct 2025 18:33:04 +0200 Subject: [PATCH 2/3] Fix: Add Ukrainian lemmatizer --- Dockerfile | 52 ++++--------------- clt_tests/tests/test-ukrainian-morphology.rec | 35 +++++++++++++ 2 files changed, 45 insertions(+), 42 deletions(-) create mode 100644 clt_tests/tests/test-ukrainian-morphology.rec diff --git a/Dockerfile b/Dockerfile index f94d470..df7431f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ RUN groupadd -r manticore && useradd -r -g manticore manticore ENV GOSU_VERSION 1.11 -ENV DAEMON_URL=${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server_13.13.0-25100704-e5465fe44__ARCH_64.deb \ +ENV DAEMON_URL ${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server_13.13.0-25100704-e5465fe44__ARCH_64.deb \ https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server-core_13.13.0-25100704-e5465fe44__ARCH_64.deb \ https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-backup_1.9.6+25070510-5247d066_all.deb \ https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-buddy_3.35.1+25090418-41d9811f_all.deb \ @@ -98,46 +98,14 @@ RUN if [ -d "/packages/" ]; then apt -y install /packages/*deb; fi \ && tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/ \ && rm /tmp/*.pak.tgz -# Installing the Ukrainian Lettimizer -# Installing dependencies for building Python -RUN cd /tmp && \ - apt-get update - -RUN apt-get install -y \ - build-essential \ - libreadline-dev \ - libncursesw5-dev \ - libssl-dev \ - libsqlite3-dev \ - tk-dev \ - libgdbm-dev \ - libc6-dev \ - libbz2-dev \ - libffi-dev \ - zlib1g-dev - -# Download and compile Python 3.9 -WORKDIR /tmp -RUN wget https://www.python.org/ftp/python/3.9.4/Python-3.9.4.tgz && \ - tar xzf Python-3.9.4.tgz - -WORKDIR /tmp/Python-3.9.4 -RUN ./configure --enable-optimizations --enable-shared && \ - make -j$(nproc) altinstall - -# Updating the linker cache -RUN ldconfig - -# Installing pymorphy2 and the Ukrainian dictionary -RUN LD_LIBRARY_PATH=/tmp/Python-3.9.4 pip3.9 install pymorphy2[fast] && \ - LD_LIBRARY_PATH=/tmp/Python-3.9.4 pip3.9 install pymorphy2-dicts-uk - -RUN rm -rf /tmp/Python-3.9.4* /tmp/manticore-repo.noarch.deb && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /var/lib/manticore -# END: Installing the Ukrainian Lettimizer +# Installing the Ukrainian Lemmatizer using the working Jammy approach +RUN apt-get update && apt-get install -y software-properties-common curl && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.9 python3.9-dev python3.9-distutils && \ + curl https://bootstrap.pypa.io/get-pip.py | python3.9 && \ + python3.9 -m pip install pymorphy2 pymorphy2-dicts-uk && \ + apt-get clean && rm -rf /var/lib/apt/lists/* COPY manticore.conf.sh /etc/manticoresearch/ RUN sed -i '/log = \/var\/log\/manticore\/searchd.log/d;/query_log = \/var\/log\/manticore\/query.log/d' /etc/manticoresearch/manticore.conf @@ -163,7 +131,7 @@ EXPOSE 9308 EXPOSE 9312 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 -ENV MANTICORE_CONFIG="/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf" +ENV MANTICORE_CONFIG "/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf" CMD ["searchd", "-c", "/etc/manticoresearch/manticore.conf.sh", "--nodetach"] # How to build manually: diff --git a/clt_tests/tests/test-ukrainian-morphology.rec b/clt_tests/tests/test-ukrainian-morphology.rec new file mode 100644 index 0000000..043e13a --- /dev/null +++ b/clt_tests/tests/test-ukrainian-morphology.rec @@ -0,0 +1,35 @@ +––– block: ./base/init ––– +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "CREATE TABLE test_uk (id bigint, content text) rt_mem_limit = '256M' morphology = 'lemmatize_uk' charset_table = '0..9, A..Z->a..z, _, a..z, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+0454, U+0456, U+0457, U+0491';" +––– output ––– +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "INSERT INTO test_uk (id, content) VALUES (1, 'бігаю'), (2, 'муркотіти'), (3, 'їжа');" +––– output ––– +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('бігаю', 'test_uk');" +––– output ––– ++------+------------+--------------+ +| qpos | tokenized | normalized | ++------+------------+--------------+ +| 1 | бігаю | бігати | ++------+------------+--------------+ +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('їжа', 'test_uk');" +––– output ––– ++------+-----------+------------+ +| qpos | tokenized | normalized | ++------+-----------+------------+ +| 1 | їжа | їжа | ++------+-----------+------------+ +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "SELECT * FROM test_uk WHERE MATCH('бігати');" +––– output ––– ++------+---------+ +| id | content | ++------+---------+ +| 1 | бігаю | ++------+---------+ +––– input ––– +docker stop manticore +––– output ––– +#!/[0-9a-z]+/!# From 43dbde42b298370ce33293678ea94db307551bbd Mon Sep 17 00:00:00 2001 From: djklim87 Date: Fri, 17 Oct 2025 04:08:40 +0200 Subject: [PATCH 3/3] Feat: Fix error reporting --- Dockerfile | 1 + clt_tests/tests/test-ukrainian-morphology.rec | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index df7431f..1e593c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -131,6 +131,7 @@ EXPOSE 9308 EXPOSE 9312 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 +ENV PYTHONWARNINGS "ignore::UserWarning:pymorphy2.analyzer" ENV MANTICORE_CONFIG "/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf" CMD ["searchd", "-c", "/etc/manticoresearch/manticore.conf.sh", "--nodetach"] diff --git a/clt_tests/tests/test-ukrainian-morphology.rec b/clt_tests/tests/test-ukrainian-morphology.rec index 043e13a..948fc50 100644 --- a/clt_tests/tests/test-ukrainian-morphology.rec +++ b/clt_tests/tests/test-ukrainian-morphology.rec @@ -1,5 +1,13 @@ ––– block: ./base/init ––– ––– input ––– +docker run -d --name manticore manticoresoftware/manticore:current +––– output ––– +#!/[0-9a-z]+/!# +––– input ––– +if timeout 5 grep -qm1 'accepting connections' <(docker logs -f manticore); then echo 'accepting connections'; else echo 'Manticore failed to start properly in 10 seconds'; fi +––– output ––– +accepting connections +––– input ––– docker exec manticore mysql -h0 -P9306 -e "CREATE TABLE test_uk (id bigint, content text) rt_mem_limit = '256M' morphology = 'lemmatize_uk' charset_table = '0..9, A..Z->a..z, _, a..z, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+0454, U+0456, U+0457, U+0491';" ––– output ––– ––– input ––– @@ -24,11 +32,11 @@ docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('їжа', 'test_uk');" ––– input ––– docker exec manticore mysql -h0 -P9306 -e "SELECT * FROM test_uk WHERE MATCH('бігати');" ––– output ––– -+------+---------+ -| id | content | -+------+---------+ -| 1 | бігаю | -+------+---------+ ++------+------------+ +| id | content | ++------+------------+ +| 1 | бігаю | ++------+------------+ ––– input ––– docker stop manticore ––– output –––