Fixes #5906 - ElasticSearch attachments limit is hardcoded

Fixes #5907 - Attachments over ES limit are silently not indexed
This commit is contained in:
Mantas Masalskis 2026-04-03 18:12:23 +03:00
parent e2aa99b906
commit f9cf9cac77
12 changed files with 281 additions and 173 deletions

View file

@ -32,13 +32,21 @@ services:
command: sleep infinity
elasticsearch:
image: elasticsearch:9.0.4
image: elasticsearch:9.3.2
environment:
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
- discovery.type=single-node
- xpack.security.enabled=false
- http.cors.enabled=true
- http.cors.allow-origin=/https:\/\/app.elasticvue.com/
# Uncomment and change the following settings if you need to allow larger payloads, e.g. for debugging or testing.
# Max length of 100MB translates to ~40MB for allowed attachments, due to base64 encoding overhead.
# Indexing pressure limit of 10% of heap memory is enough for ~50MB attachments (see a ES_JAVA_OPTS above).
# Keep in mind to keep these limits aligned with the following Zammad settings:
# `es_attachment_max_size_in_mb=10` and `es_total_max_size_in_mb=300`.
# - http.max_content_length=100mb
# - indexing_pressure.memory.limit=10%
# - indexing_pressure.memory.coordinating.limit=10%
restart: unless-stopped
ports:
# Expose the ES service for direct access if needed, but avoid conflicts with host services.

View file

@ -0,0 +1,42 @@
# Copyright (C) 2012-2026 Zammad Foundation, https://zammad-foundation.org/
module CanLookupSearchIndexAttributesWithAttachments
extend ActiveSupport::Concern
def search_index_attachments_lookup(current_size)
attachments.filter_map do |attachment|
next if !search_index_attachment_indexable?(attachment, current_size)
SearchIndexBackend.attachment_to_attributes(attachment)
end
end
private
def search_index_attachment_indexable?(attachment, current_size)
if SearchIndexBackend.attachment_ignored?(attachment)
search_index_attachment_log "Attachment #{attachment.id} is ignored for search index due to its file name or type."
return false
end
if SearchIndexBackend.attachment_too_big?(attachment)
search_index_attachment_log "Attachment #{attachment.id} is ignored for search index due to its file size."
return false
end
if SearchIndexBackend.payload_too_big?(current_size + attachment.content.bytesize)
search_index_attachment_log "Attachment #{attachment.id} is ignored for search index due to total payload size limit."
return false
end
true
end
def search_index_attachment_log(message)
Rails.logger.info message
return if !defined?(Rack) || !defined?(Rack.application) || Rack.application.top_level_tasks.none?
puts message # rubocop:disable Rails/Output
end
end

View file

@ -8,6 +8,7 @@ class KnowledgeBase::Answer < ApplicationModel
include ChecksKbClientNotification
include ChecksKbClientVisibility
include CanCloneAttachments
include CanLookupSearchIndexAttributesWithAttachments
AGENT_ALLOWED_ATTRIBUTES = %i[category_id promoted internal_note].freeze
AGENT_ALLOWED_NESTED_RELATIONS = %i[translations].freeze

View file

@ -41,11 +41,13 @@ class KnowledgeBase::Answer::Translation < ApplicationModel
def search_index_attribute_lookup(include_references: true)
attrs = super
attrs.merge('title' => ActionController::Base.helpers.strip_tags(attrs['title']),
'content' => content&.search_index_attribute_lookup,
'scope_id' => answer.category_id,
'attachment' => answer.attachments_for_search_index_attribute_lookup,
'tags' => answer.tag_list)
attrs['title'] = ActionController::Base.helpers.strip_tags(title)
attrs['content'] = content&.search_index_attribute_lookup
attrs['scope_id'] = answer.category_id
attrs['tags'] = answer.tag_list
attrs['attachment'] = answer.search_index_attachments_lookup(attrs.to_json.bytesize)
attrs
end
def vector_index_data

View file

@ -30,6 +30,7 @@ class Ticket::Article < ApplicationModel
include Ticket::Article::AddsMetadataWhatsapp
include HasTransactionDispatcher
include CanLookupSearchIndexAttributesWithAttachments
belongs_to :ticket, optional: true
has_one :ticket_time_accounting, class_name: 'Ticket::TimeAccounting', foreign_key: :ticket_article_id, dependent: :destroy, inverse_of: :ticket_article

View file

@ -22,11 +22,12 @@ module Ticket::SearchIndex
attributes['checklist'] = checklist.search_index_attribute_lookup(include_references: false)
end
# current payload size
total_size_current = 0
# collect article data
attributes['article'] = []
# current payload size
total_size_current = attributes.to_json.bytesize
Ticket::Article.where(ticket_id: id).limit(1000).find_each(batch_size: 50).each do |article|
# lookup attributes of ref. objects (normally name and note)
@ -34,27 +35,15 @@ module Ticket::SearchIndex
article_attributes_payload_size = article_attributes.to_json.bytesize
next if search_index_attribute_lookup_oversized?(total_size_current + article_attributes_payload_size)
next if SearchIndexBackend.payload_too_big?(total_size_current + article_attributes_payload_size)
# add body size to totel payload size
total_size_current += article_attributes_payload_size
# lookup attachments
article_attributes['attachment'] = []
article_attributes['attachment'] = article.search_index_attachments_lookup(total_size_current)
article.attachments.each do |attachment|
next if search_index_attribute_lookup_file_ignored?(attachment)
next if search_index_attribute_lookup_file_oversized?(attachment, total_size_current)
next if search_index_attribute_lookup_oversized?(total_size_current + attachment.content.bytesize)
# add attachment size to totel payload size
total_size_current += attachment.content.bytesize
article_attributes['attachment'].push search_index_article_attachment_attributes(attachment)
end
total_size_current += article_attributes['attachment'].map { it['_size'] }.sum
attributes['article'].push article_attributes
end
@ -64,42 +53,6 @@ module Ticket::SearchIndex
private
def search_index_attribute_lookup_oversized?(total_size_current)
# if complete payload is to high
total_max_size_in_kb = (Setting.get('es_total_max_size_in_mb') || 300).megabyte
return true if total_size_current >= total_max_size_in_kb
false
end
def search_index_attribute_lookup_file_oversized?(attachment, total_size_current)
return true if attachment.content.blank?
# if attachment size is bigger as configured
attachment_max_size = (Setting.get('es_attachment_max_size_in_mb') || 10).megabyte
return true if attachment.content.bytesize > attachment_max_size
# if complete size is bigger as configured
return true if search_index_attribute_lookup_oversized?(total_size_current + attachment.content.bytesize)
false
end
def search_index_attribute_lookup_file_ignored?(attachment)
return true if attachment.filename.blank?
filename_extention = attachment.filename.downcase
filename_extention.gsub!(%r{^.*(\..+?)$}, '\\1')
# list ignored file extensions
attachments_ignore = Setting.get('es_attachment_ignore') || [ '.png', '.jpg', '.jpeg', '.mpeg', '.mpg', '.mov', '.bin', '.exe' ]
return true if attachments_ignore.include?(filename_extention.downcase)
false
end
def search_index_article_attributes(article)
# lookup attributes of ref. objects (normally name and note)
@ -122,12 +75,4 @@ module Ticket::SearchIndex
article_attributes
end
def search_index_article_attachment_attributes(attachment)
{
'size' => attachment.size,
'_name' => attachment.filename,
'_content' => Base64.encode64(attachment.content).delete("\n"),
}
end
end

View file

@ -1132,4 +1132,30 @@ helper method for making HTTP calls and raising error if response was not succes
'aggregations' => { 'time_buckets' => { 'buckets' => [] } }
}
end
def self.attachment_ignored?(attachment)
return true if attachment.filename.blank?
extension = File.extname(attachment.filename).downcase
Setting.get('es_attachment_ignore').include?(extension)
end
def self.attachment_too_big?(attachment)
return true if attachment.content.blank?
attachment.content.bytesize > Setting.get('es_attachment_max_size_in_mb').megabyte
end
def self.payload_too_big?(new_size)
new_size >= Setting.get('es_total_max_size_in_mb').megabyte
end
def self.attachment_to_attributes(attachment)
{
'_size' => attachment.content.bytesize,
'_name' => attachment.filename,
'_content' => Base64.encode64(attachment.content).delete("\n")
}
end
end

View file

@ -1223,4 +1223,95 @@ RSpec.describe SearchIndexBackend do
expect(result).to include(include(id: organization.id.to_s, type: 'Organization'))
end
end
describe '.attachment_ignored?', searchindex: false do
let(:store) do
create(:store,
object: 'SomeObject',
o_id: 1,
data: 'some content',
filename:)
end
context 'when attachment is indexable', searchindex: false do
let(:filename) { 'test.TXT' }
it 'return false' do
expect(described_class).not_to be_attachment_ignored(store)
end
end
context 'when attachment is not indexable' do
let(:filename) { 'test.bin' }
it 'return true' do
expect(described_class).to be_attachment_ignored(store)
end
end
end
describe '.attachment_too_big?', searchindex: false do
let(:store) do
create(:store,
object: 'SomeObject',
o_id: 1,
data: 'a' * ((1024**2) * 2.4), # with 2.4 mb
filename: 'test.TXT')
end
before do
Setting.set('es_attachment_max_size_in_mb', max_file_size_in_mb)
end
context 'when file is too big' do
let(:max_file_size_in_mb) { 2 }
it 'return true' do
expect(described_class).to be_attachment_too_big(store)
end
end
context 'when file is not too big' do
let(:max_file_size_in_mb) { 3 }
it 'return false' do
expect(described_class).not_to be_attachment_too_big(store)
end
end
end
describe '.payload_too_big?', searchindex: false do
context 'when payload is too big' do
it 'return true' do
expect(described_class).to be_payload_too_big(2.gigabytes)
end
end
context 'when payload is not too big' do
it 'return false' do
expect(described_class).not_to be_payload_too_big(2.megabytes)
end
end
end
describe '.attachment_to_attributes', searchindex: false do
let(:data) { 'some content' }
let(:store) do
create(:store,
object: 'SomeObject',
o_id: 1,
data:,
filename: 'test.TXT')
end
it 'return a hash with attachment attributes' do
expect(described_class.attachment_to_attributes(store)).to eq(
{
'_name' => 'test.TXT',
'_size' => 12,
'_content' => Base64.encode64(data).delete("\n"),
}
)
end
end
end

View file

@ -0,0 +1,93 @@
# Copyright (C) 2012-2026 Zammad Foundation, https://zammad-foundation.org/
RSpec.shared_examples 'CanLookupSearchIndexAttributesWithAttachments' do
subject { create(described_class.name.underscore) }
describe '#search_index_attachments_lookup' do
let(:attachment_1) do
create(:store,
object: described_class.name,
o_id: subject.id,
data: 'a' * ((1024**2) * 2.4), # with 2.4 mb
filename: 'test.TXT')
end
let(:attachment_2) do
create(:store,
object: described_class.name,
o_id: subject.id,
data: 'Some content',
filename: 'example.txt')
end
let(:attachment_3) do
create(:store,
object: described_class.name,
o_id: subject.id,
data: 'Some content',
filename: 'fancy.exe')
end
before do
attachment_1
attachment_2
attachment_3
end
context 'when an attachment is ignored due to file type' do
it 'does not include the attachment in the results' do
expect(subject.search_index_attachments_lookup(0)).to include(
include('_name' => 'example.txt'),
include('_name' => 'test.TXT'),
).and not_include(
include('_name' => 'fancy.exe'),
)
end
end
context 'when an attachment is too big' do
before do
Setting.set('es_attachment_max_size_in_mb', 2)
end
it 'does not include the large attachment in the results' do
expect(subject.search_index_attachments_lookup(0)).to include(
include('_name' => 'example.txt'),
).and not_include(
include('_name' => 'test.TXT'),
)
end
end
context 'when adding the attachment would exceed the total payload size limit' do
context 'when a large attachment exceeds the total payload size limit' do
before do
Setting.set('es_total_max_size_in_mb', 2)
end
it 'does not include the large attachment in the results' do
expect(subject.search_index_attachments_lookup(0)).to include(
include('_name' => 'example.txt'),
).and not_include(
include('_name' => 'test.TXT'),
)
end
end
context 'when the existing payload size is already close to the limit' do
before do
Setting.set('es_total_max_size_in_mb', 3)
end
it 'does not include the attachment that would exceed the limit in the results' do
expect(subject.search_index_attachments_lookup(1024**2)).to include(
include('_name' => 'example.txt'),
).and not_include(
include('_name' => 'test.TXT'),
include('_name' => 'fancy.exe')
)
end
end
end
end
end

View file

@ -4,11 +4,13 @@ require 'rails_helper'
require 'models/concerns/checks_kb_client_notification_examples'
require 'models/concerns/has_tags_examples'
require 'models/contexts/factory_context'
require 'models/concerns/can_lookup_search_index_attributes_with_attachments_examples'
RSpec.describe KnowledgeBase::Answer, current_user_id: 1, type: :model do
subject(:kb_answer) { create(:knowledge_base_answer) }
it_behaves_like 'HasTags'
it_behaves_like 'CanLookupSearchIndexAttributesWithAttachments'
include_context 'factory'

View file

@ -6,6 +6,7 @@ require 'models/concerns/can_be_imported_examples'
require 'models/concerns/can_csv_import_examples'
require 'models/concerns/has_history_examples'
require 'models/concerns/has_object_manager_attributes_examples'
require 'models/concerns/can_lookup_search_index_attributes_with_attachments_examples'
require 'models/ticket/article/has_ticket_contact_attributes_impact_examples'
RSpec.describe Ticket::Article, type: :model do

View file

@ -1799,110 +1799,6 @@ RSpec.describe Ticket, type: :model do
end
end
describe '.search_index_attribute_lookup_oversized?' do
subject!(:ticket) { create(:ticket) }
context 'when payload is ok' do
let(:current_payload_size) { 3.megabytes }
it 'return false' do
expect(ticket.send(:search_index_attribute_lookup_oversized?, current_payload_size)).to be false
end
end
context 'when payload is bigger' do
let(:current_payload_size) { 350.megabytes }
it 'return true' do
expect(ticket.send(:search_index_attribute_lookup_oversized?, current_payload_size)).to be true
end
end
end
describe '.search_index_attribute_lookup_file_oversized?' do
subject!(:store) do
create(:store,
object: 'SomeObject',
o_id: 1,
data: 'a' * ((1024**2) * 2.4), # with 2.4 mb
filename: 'test.TXT')
end
context 'when total payload is ok' do
let(:current_payload_size) { 200.megabytes }
it 'return false' do
expect(ticket.send(:search_index_attribute_lookup_file_oversized?, store, current_payload_size)).to be false
end
end
context 'when total payload is oversized' do
let(:current_payload_size) { 299.megabytes }
it 'return true' do
expect(ticket.send(:search_index_attribute_lookup_file_oversized?, store, current_payload_size)).to be true
end
end
end
describe '.search_index_attribute_lookup_file_ignored?' do
context 'when attachment is indexable' do
subject!(:store_with_indexable_extention) do
create(:store,
object: 'SomeObject',
o_id: 1,
data: 'some content',
filename: 'test.TXT')
end
it 'return false' do
expect(ticket.send(:search_index_attribute_lookup_file_ignored?, store_with_indexable_extention)).to be false
end
end
context 'when attachment is no indexable' do
subject!(:store_without_indexable_extention) do
create(:store,
object: 'SomeObject',
o_id: 1,
data: 'some content',
filename: 'test.BIN')
end
it 'return true' do
expect(ticket.send(:search_index_attribute_lookup_file_ignored?, store_without_indexable_extention)).to be true
end
end
end
describe '.search_index_article_attachment_attributes' do
context 'payload for article' do
subject!(:store_item) do
create(:store,
object: 'SomeObject',
o_id: 1,
data: 'some content',
filename: 'test.TXT')
end
it 'verify count of attributes' do
expect(ticket.send(:search_index_article_attachment_attributes, store_item).count).to be 3
end
it 'verify size' do
expect(ticket.send(:search_index_article_attachment_attributes, store_item)['size']).to eq '12'
end
it 'verify _name' do
expect(ticket.send(:search_index_article_attachment_attributes, store_item)['_name']).to eq 'test.TXT'
end
it 'verify _content' do
expect(ticket.send(:search_index_article_attachment_attributes, store_item)['_content']).to eq 'c29tZSBjb250ZW50'
end
end
end
describe '.search_index_article_attributes' do
context 'payload for attachment' do
subject!(:ticket_article) do