Fix: Email Parsing for html fix (#3659)

This commit is contained in:
Tejaswini Chile
2022-01-21 05:25:35 +05:30
committed by GitHub
parent a7c947aeae
commit e997aaceb7
3 changed files with 85 additions and 28 deletions

View File

@@ -8,30 +8,49 @@ class MailPresenter < SimpleDelegator
end
def subject
encode_to_unicode(@mail.subject || '')
encode_to_unicode(@mail.subject)
end
def text_content
@decoded_text_content ||= encode_to_unicode(text_part&.decoded || decoded_message || '')
@decoded_text_content = select_body(text_part) || ''
encoding = @decoded_text_content.encoding
body = EmailReplyTrimmer.trim(@decoded_text_content)
return {} if @decoded_text_content.blank?
@text_content ||= {
full: @decoded_text_content,
reply: extract_reply(@decoded_text_content)[:reply],
quoted: extract_reply(@decoded_text_content)[:quoted_text]
full: select_body(text_part),
reply: @decoded_text_content,
quoted: body.force_encoding(encoding).encode('UTF-8')
}
end
# returns encoded mail body text_part if available.
# else returns parsed the html body if contains text/html content.
def select_body(mail_part)
return '' unless mail_part
decoded = encode_to_unicode(mail_part.decoded)
if mail.text_part
decoded
elsif (mail.content_type || '').include? 'text/html'
::HtmlParser.parse_reply(decoded)
end
end
def html_content
@decoded_html_content ||= encode_to_unicode(html_part&.decoded)
@decoded_html_content = select_body(html_part) || ''
return {} if @decoded_html_content.blank?
body = EmailReplyTrimmer.trim(@decoded_html_content)
@html_content ||= {
full: @decoded_html_content,
reply: extract_reply(@decoded_html_content)[:reply],
quoted: extract_reply(@decoded_html_content)[:quoted_text]
full: select_body(html_part),
reply: @decoded_html_content,
quoted: body
}
end
@@ -47,14 +66,6 @@ class MailPresenter < SimpleDelegator
end
end
def decoded_message
if mail.multipart?
return mail.text_part ? mail.text_part.decoded : nil
end
mail.decoded
end
def number_of_attachments
mail.attachments.count
end
@@ -114,13 +125,7 @@ class MailPresenter < SimpleDelegator
return str if current_encoding == 'UTF-8'
str.encode(current_encoding, 'UTF-8', invalid: :replace, undef: :replace, replace: '?')
end
def extract_reply(content)
# NOTE: implement the reply parser over here
{
reply: content.strip,
quoted_text: content.strip
}
rescue StandardError
''
end
end

View File

@@ -30,14 +30,14 @@ Content-Type: text/html; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
<div dir=3D"ltr">Yes, I am providing you step how to reproduce this issue</=
div><br><div class=3D"gmail_quote"><div dir=3D"ltr" class=3D"gmail_attr">On=
div><br>class=3D"gmail_quote"><div dir=3D"ltr" class=3D"gmail_attr">On=
Thu, Aug 19, 2021 at 2:07 PM Tejaswini from Email sender test &l=
t;<a href=3D"mailto:tejaswini@chatwoot.com">tejaswini@chatwoot.com</a>&gt; wrot=
e:<br></div><blockquote class=3D"gmail_quote" style=3D"margin:0px 0px 0px 0=
.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> <p>
</p><p>Any update on this?</p>
</p>Any update on this?</p>
<p></p>
<p>
</blockquote></div><br clear=3D"all"><div><br></div>-- <br><div dir=3D"ltr"=
class=3D"gmail_signature"><div dir=3D"ltr"><div><div dir=3D"ltr"><div><div=
><b>Sony Mathew.</b><br></div><span style=3D"font-family:&quot;times ne=

View File

@@ -96,5 +96,57 @@ RSpec.describe ReplyMailbox, type: :mailbox do
expect(conversation_1.messages.last.content).to eq("Let's talk about these images:")
end
end
context 'with quotes in email' do
let(:described_subject) { described_class.receive mail_with_quote }
before do
# this UUID is hardcoded in the reply.eml, that's why we are updating this
conversation.uuid = '6bdc3f4d-0bec-4515-a284-5d916fdde489'
conversation.save
end
it 'add the mail content as new message on the conversation' do
described_subject
current_message = conversation.messages.last
expect(current_message.content).to eq(
<<-BODY.strip_heredoc.chomp
Yes, I am providing you step how to reproduce this issue
On Thu, Aug 19, 2021 at 2:07 PM Tejaswini from Email sender test < tejaswini@chatwoot.com> wrote:
> Any update on this?
>
>
--
* Sony Mathew*
Software developer
*Mob:9999999999
BODY
)
end
it 'add the mail content as new message on the conversation with broken html' do
described_subject
current_message = conversation.messages.last
expect(current_message.reload.content_attributes[:email][:text_content][:reply]).to eq(
<<-BODY.strip_heredoc.chomp
Yes, I am providing you step how to reproduce this issue
On Thu, Aug 19, 2021 at 2:07 PM Tejaswini from Email sender test < tejaswini@chatwoot.com> wrote:
> Any update on this?
>
>
--
* Sony Mathew*
Software developer
*Mob:9999999999
BODY
)
end
end
end
end