mirror of
https://github.com/FliegendeWurst/ripgrep-all.git
synced 2024-11-08 14:00:37 +00:00
Initial work on mbox extractor
This commit is contained in:
parent
5fa777605d
commit
9642552fa3
142
Cargo.lock
generated
142
Cargo.lock
generated
@ -25,7 +25,7 @@ version = "0.7.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"memchr 2.5.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -85,7 +85,7 @@ dependencies = [
|
||||
"flate2",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"memchr",
|
||||
"memchr 2.5.0",
|
||||
"pin-project-lite 0.2.9",
|
||||
"tokio 0.2.25",
|
||||
"tokio 0.3.7",
|
||||
@ -179,6 +179,12 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
@ -188,6 +194,12 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@ -266,6 +278,12 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "casing"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8069a314fdf61ab368081307e6d351431b2a1a04822eba87834230a238cb93c4"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
@ -281,6 +299,16 @@ version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "charset"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18e9079d1a12a2cc2bffb5db039c43661836ead4082120d5844f02555aca2d46"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"encoding_rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.23"
|
||||
@ -304,7 +332,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"strsim 0.8.0",
|
||||
"textwrap",
|
||||
"unicode-width",
|
||||
@ -317,7 +345,7 @@ version = "4.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0b0588d44d4d63a87dbd75c136c166bbfd9a86a31cb89e09906521c7d3f5e3"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"clap_lex",
|
||||
"is-terminal",
|
||||
"strsim 0.10.0",
|
||||
@ -486,6 +514,12 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb"
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
@ -966,7 +1000,7 @@ version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "447a296f7aca299cfbb50f4e4f3d49451549af655fb7215d7f8c0c3d64bad42b"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"byteorder",
|
||||
"libc",
|
||||
"lmdb-rkv-sys",
|
||||
@ -1013,6 +1047,41 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mailbox"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "602ce3fa1224e605e1c9949ff090baf17e3c84e8d87123ee31d838a2ed5ed9f5"
|
||||
dependencies = [
|
||||
"bitflags 0.9.1",
|
||||
"casing",
|
||||
"chrono",
|
||||
"fnv",
|
||||
"mime",
|
||||
"nom 3.2.1",
|
||||
"owning_ref",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mailparse"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b56570f5f8c0047260d1c8b5b331f62eb9c660b9dd4071a8c46f8c7d3f280aa"
|
||||
dependencies = [
|
||||
"charset",
|
||||
"data-encoding",
|
||||
"quoted_printable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
@ -1028,6 +1097,18 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
|
||||
|
||||
[[package]]
|
||||
name = "mime2ext"
|
||||
version = "0.1.52"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1a85a5069ebd40e64b1985773cc81addbe9d90d7ecf60e7b5475a57ad584c70"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
@ -1055,13 +1136,22 @@ dependencies = [
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "3.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05aec50c70fd288702bcd93284a8444607f3292dbdf2a30de5ea5dcdbe72287b"
|
||||
dependencies = [
|
||||
"memchr 1.0.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"memchr 2.5.0",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
@ -1169,6 +1259,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "owning_ref"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdf84f41639e037b484f93433aa3897863b561ed65c6e59c7073d7c561710f37"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
@ -1308,13 +1407,19 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quoted_printable"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6"
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1335,7 +1440,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"memchr 2.5.0",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
@ -1381,7 +1486,10 @@ dependencies = [
|
||||
"json_comments",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memchr",
|
||||
"mailbox",
|
||||
"mailparse",
|
||||
"memchr 2.5.0",
|
||||
"mime2ext",
|
||||
"paste",
|
||||
"path-clean",
|
||||
"pretty-bytes",
|
||||
@ -1411,7 +1519,7 @@ checksum = "a6006704273063c72952370ad236b8d58556dcc4f99a95ced4d9ad40f3e80a69"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"bincode",
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"byteorder",
|
||||
"id-arena",
|
||||
"lazy_static",
|
||||
@ -1432,7 +1540,7 @@ version = "0.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"fallible-iterator",
|
||||
"fallible-streaming-iterator",
|
||||
"hashlink",
|
||||
@ -1455,7 +1563,7 @@ version = "0.36.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"errno",
|
||||
"io-lifetimes",
|
||||
"libc",
|
||||
@ -1598,6 +1706,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
@ -1762,7 +1876,7 @@ dependencies = [
|
||||
"autocfg",
|
||||
"bytes 1.4.0",
|
||||
"libc",
|
||||
"memchr",
|
||||
"memchr 2.5.0",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"parking_lot",
|
||||
@ -1870,7 +1984,7 @@ dependencies = [
|
||||
"bytecount",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"nom",
|
||||
"nom 7.1.3",
|
||||
"once_cell",
|
||||
"petgraph",
|
||||
]
|
||||
|
@ -37,7 +37,10 @@ glob = "0.3.0"
|
||||
json_comments = "0.2.1"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.11"
|
||||
mailbox = "0.2.0"
|
||||
mailparse = "0.14.0"
|
||||
memchr = "2.3.3"
|
||||
mime2ext = "0.1.52"
|
||||
paste = "1.0.0"
|
||||
path-clean = "0.1.0"
|
||||
pretty-bytes = "0.2.2"
|
||||
|
217
exampledir/mail_nested.eml
Normal file
217
exampledir/mail_nested.eml
Normal file
@ -0,0 +1,217 @@
|
||||
To: submit.t4eseGWSvG1JST3r@spam.spamcop.net
|
||||
From: 2012gdwu <2012gdwu@posteo.de>
|
||||
Subject: Postbank Spam
|
||||
Autocrypt: addr=2012gdwu@posteo.de; keydata=
|
||||
mDMEXXjwiRYJKwYBBAHaRw8BAQdAmjXRazNXXy5tK05Dwl5mSRbdth9JkQq92V/QVyqjdgm0
|
||||
I0FybmUgS2VsbGVyIDxhcm5lLmtlbGxlckBwb3N0ZW8uZGU+iJYEExYIAD4WIQR2UN3HoAGx
|
||||
KI0B7Eih+UCxBQvPLgUCXXjwiQIbAwUJCWYBgAULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAK
|
||||
CRCh+UCxBQvPLpPfAP4gs6Oky3+UO2LU2XxweeQO+YEWXK0QtM2+ajzrGaF3HAD+LBfmyB9+
|
||||
Wom2KP0CwxUzI4d6zmiAMSKOnGGgzd65igm4OARdePCJEgorBgEEAZdVAQUBAQdAncxZ3Rox
|
||||
wmvm+/qCkCm9+PU2HmWr08M3qdqkf2L4IngDAQgHiH4EGBYIACYWIQR2UN3HoAGxKI0B7Eih
|
||||
+UCxBQvPLgUCXXjwiQIbDAUJCWYBgAAKCRCh+UCxBQvPLpQkAQCgYOlOftMNi+sfn+XQvfOc
|
||||
ULQWp+cgOBMcyVCdpJEQCwD9HBuwuHobl8FPm0PbRtlCn/7GY4WK+Hh4+3BKmhRn8wU=
|
||||
Message-ID: <1530ae05-33a7-fa40-9473-ca625a14385a@posteo.de>
|
||||
Date: Mon, 20 Jul 2020 07:35:55 +0200
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101
|
||||
Thunderbird/68.10.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/mixed;
|
||||
boundary="------------6670F92201FB126ED9472803"
|
||||
Content-Language: de-DE
|
||||
|
||||
This is a multi-part message in MIME format.
|
||||
--------------6670F92201FB126ED9472803
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
here you go
|
||||
|
||||
|
||||
--------------6670F92201FB126ED9472803
|
||||
Content-Type: message/rfc822;
|
||||
name="postbank.eml"
|
||||
Content-Transfer-Encoding: 7bit
|
||||
Content-Disposition: attachment;
|
||||
filename="postbank.eml"
|
||||
|
||||
Return-Path: <gxnwgddl@carcarry.de>
|
||||
Delivered-To: arne.keller@posteo.de
|
||||
Received: from proxy02.posteo.name ([127.0.0.1])
|
||||
by dovecot12 (Dovecot) with LMTP id EaKBGxv9FF+9mwEAJesNpQ
|
||||
for <arne.keller@posteo.de>; Mon, 20 Jul 2020 04:15:27 +0200
|
||||
Received: from proxy02.posteo.de ([127.0.0.1])
|
||||
by proxy02.posteo.name (Dovecot) with LMTP id 31UFGtHsFF+T4gMAGFAyLg
|
||||
; Mon, 20 Jul 2020 04:15:27 +0200
|
||||
Received: from mailin05.posteo.de (unknown [10.0.1.5])
|
||||
by proxy02.posteo.de (Postfix) with ESMTPS id 4B950v2JYGz11fk
|
||||
for <arne.keller@posteo.de>; Mon, 20 Jul 2020 04:15:27 +0200 (CEST)
|
||||
Received: from mx03.posteo.de (mailin05.posteo.de [127.0.0.1])
|
||||
by mailin05.posteo.de (Postfix) with ESMTPS id 4270120F15
|
||||
for <arne.keller@posteo.de>; Mon, 20 Jul 2020 04:15:27 +0200 (CEST)
|
||||
X-Virus-Scanned: amavisd-new at posteo.de
|
||||
X-Spam-Flag: NO
|
||||
X-Spam-Score: 2.639
|
||||
X-Spam-Level: **
|
||||
X-Spam-Status: No, score=2.639 tagged_above=-1000 required=8
|
||||
tests=[AV:Heuristics.Phishing.Email.SpoofedDomain=0.1, ALL_TRUSTED=-1,
|
||||
FROM_LOCAL_NOVOWEL=0.5, HK_RANDOM_ENVFROM=0.626, HK_RANDOM_FROM=0.999,
|
||||
HTML_FONT_LOW_CONTRAST=0.001, HTML_IMAGE_ONLY_24=1.282,
|
||||
HTML_MESSAGE=0.001, HTTPS_HTTP_MISMATCH=0.1, POSTEO_GENERICS_IO=0.01,
|
||||
T_FILL_THIS_FORM_SHORT=0.01, T_REMOTE_IMAGE=0.01] autolearn=disabled
|
||||
Received: from mout.web.de (mout.web.de [212.227.15.14])
|
||||
by mx03.posteo.de (Postfix) with ESMTPS id 4B950t696Mz10nB
|
||||
for <arne.keller@posteo.de>; Mon, 20 Jul 2020 04:15:26 +0200 (CEST)
|
||||
Authentication-Results: mx03.posteo.de; dmarc=none (p=none dis=none) header.from=carcarry.de
|
||||
Received: from [212.227.15.17] ([212.227.15.17]) by mx-ha.web.de (mxweb010
|
||||
[212.227.15.17]) with ESMTPS (Nemesis) id 1MRloE-1kQNT22I4w-00T9hm for
|
||||
<arne.keller@posteo.de>; Mon, 20 Jul 2020 04:15:26 +0200
|
||||
Received: from mout.kundenserver.de ([212.227.17.24]) by mx-ha.web.de
|
||||
(mxweb010 [212.227.15.17]) with ESMTPS (Nemesis) id 1MINbE-1k0aRm2Hzw-00EOVM
|
||||
for <2012gdwu@web.de>; Mon, 20 Jul 2020 04:15:26 +0200
|
||||
Received: from 217.160.251.109 ([217.160.251.109]) by mrelayeu.kundenserver.de
|
||||
(mreue107 [212.227.15.183]) with ESMTPSA (Nemesis) id
|
||||
1MPoPd-1kBHRt0o2F-00MqkS for <2012gdwu@web.de>; Mon, 20 Jul 2020 04:15:26
|
||||
+0200
|
||||
From: "=?utf-8?B?UE9TVEJBTs2fS82f?=" <gxnwgddl@carcarry.de>
|
||||
Subject: BsetSign App : Y7P32-HTXU2-FRDG7
|
||||
To: "2012gdwu" <2012gdwu@web.de>
|
||||
Content-Type: multipart/alternative; boundary="QHebeB08yNTYquFAhtQnxv=_cOW4Xd528c"
|
||||
MIME-Version: 1.0
|
||||
Date: Mon, 20 Jul 2020 02:15:26 +0000
|
||||
Message-ID: <1M3lHZ-1jyAPt0pTn-000u1I@mrelayeu.kundenserver.de>
|
||||
X-Provags-ID: V03:K1:68TECBVA88ZKh8HcSl/N+ElwlecL1tc+1AuDDyqm9em66WO295R
|
||||
IfuHqA9uG7+Vlyr99v+OneGltnr43KfsgRKj9GgOpDj2QelHphKFGPILAvvsQ8vOq6ucC2W
|
||||
BW3NEOh3JhitB6o4xLEmj+dbivC0ie728/cPMcjj6TwyBzw5nT1or8mBZWoEMSF/zcu+PIr
|
||||
gGpFY2puzzURN4oKX82/w==
|
||||
X-Spam-Flag: NO
|
||||
X-UI-Out-Filterresults: notjunk:1;V03:K0:c01ZANnvlk8=:ouSMGue72FUx2PJOSNnmEW
|
||||
qI8A89gf6q3aAdJBhLX1Bhd70xio64ljpha9X5ArOYg6Q2RH1JYyvfBSMoTo3HMy37H3L8kaq
|
||||
ReRCdSPOMD8+llZ/rRpPLl+7PofGOv+Hu3UO7gzgm9v0YqwLZIwh9P2w9TIu+GqVJWeDdmxrs
|
||||
RDPeHY8lsRL+8AFeSGNiWBYMEHDxKofTqS5Zh7mal1Bm4JbgEEIP36V4oL3c6V1olMHQZzEH9
|
||||
7D0T8U6LyLyfSbuu5M6QN2FZ+F6IDJNDUG1uwNt9K12ESY6TweMR3xInFabiZ9fMPmrjPaNwW
|
||||
hlyKg67tDYL2lfk2fpa/LbhLnlfKEDqSvkgK54CZh+xbIQetju66cZUEFQyCIcGdAOWI8+nty
|
||||
FdbNUzxhNpZTPBrA7H95gRuc0u2GJBfZZsxdp46jpBwG65yqmJ32pkJrATo8CNbBO9A6hpdyL
|
||||
UNu5bavZBJp9dsyY6Cnm6vMOIjJ8qMy/vNkrtRXNWBrnVHhuQZ3B+osG8XWLiyq7s4hFOwDxY
|
||||
WLRgjKL6HgIj+2DLParwiuSsX8TVy5+WhxDUou0UJDzD3C1JmYiryTlo4Vu4CIZFXkgAuAsEq
|
||||
c55M6L2eUmD3xQNaqgMEJFksT2qXWaSb2Qw6HM7mtLBbSUhuWtSv2oeVrNwgx8XWexWYYZYFv
|
||||
KAZzICpkVhxpYIntoKRiDtQZxBDejPwGmne2iG81rn34pGJwOOYojf9dFghodE5bZEqVh6KbA
|
||||
f/38x9FIoYewzA2WuyngX/bXTdkLQM49W1vdlF5DQOlgYuM8Ni7NeJG888VhDZxcUn6vIIJs3
|
||||
xH0jOWrWCUz0gK9uyyagjcfdXr54Zv1E7i936CTlRq5QnDKN2C9jQFH5ymD4G1W5zX6Xj/05O
|
||||
M7VaU9Y3mvOM/+82zsKc5zJOFOf9MoI5JBhnPjHWeqaJgpYhNoKgGvPo3QfZFwzk/MHH2PgB1
|
||||
PLGvjSE8u/cpYeGhJdzTXM00J9ai5yGRNFD71zHoHBOFGCpmZVnJJ8SD+qUd4K4BfSD+DJ5Qd
|
||||
t1wsCpH5bgodnXgMcN6Zj0q3P/ODk3dnah1hsYMyIWDBFZ0cTlp2QkYhAKZh1HM5WcfSc5UwU
|
||||
SrcK9HHiG7BKOFYA1r6Rx5YYqwGWeGxr9mlH7MLyfCwI8PlWtfeB7Pj4eEI1hLy9GMnHBCJDj
|
||||
W8o1yDeE54rgWHR7CtIF6w+qF+quA3ZdwVSPOHwQeH7vS4OaJjeEyeeT4YOJdIMI7UknEasAG
|
||||
LfMS/PKWx7+YcUNaz0xvO70NwZj1FKJuWqDS6ZTciMSvGkEFTWVOqn5nPlHi8hDbBTVn70aPa
|
||||
BQi3U68hgdDpJIHlVLLvRcaCYYly3L60NQBgJroag4fRiIvDUSXfDatrDYOv+L4xBYdB3GP+s
|
||||
wqtsPY82YOwXP5KlRMPVEZcuWX5tWiOuaNjePbEkXpE2iQZUqfkDQTYNUGZR+TTBqHOWjO7R3
|
||||
hORQB0gOwe85gZv80G1EL32EtRjVxJxQfrHGPCGXb8HRXbvGGV3Xu3wZEE8iuJngBUJtWeDBq
|
||||
q61rYwZxVuml72lfRM6Lo+OGLAsyqvobxujY9BHpokZH4FNlUstjUoPANTGoAhM+MyQb0fSAV
|
||||
8HA/r6n0oJh0B8+2AxJvVokbhEbL/RlJIZIYpCeRceeA+jjBaR7EvuglUoLN3CcB9CrdDH/qz
|
||||
ymHzEjPVnFar3/sqRjeKyIk71z4yotOKCPQcdD1gTbYWehZiIJwAlDFSpfPdFTQLOJMWd3wuD
|
||||
0mHLep6tLtCY+hjhCYWlTyKKQ8CWiBWPTql21bPp7XVWCfc+4u8kZi5Y3dg3pvpSwwmcyRisX
|
||||
+7+8a+pBzN4VOEuX+dzglKDrNd6h2OL0tBMnk1yqAV27dX9cMRrO941IvtiaZO90BjZtV92oP
|
||||
XkGxvKnGQuynHus/3yblaw==
|
||||
|
||||
This is a multi-part message in MIME format
|
||||
|
||||
--QHebeB08yNTYquFAhtQnxv=_cOW4Xd528c
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
Content-Disposition: inline
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Sehr geehrter Herr / Frau =E2=80=A6,
|
||||
|
||||
Ab dem 20. Jul 2020 aktualisiert die Postbank alle BestSign-Anwendung=
|
||||
en.
|
||||
|
||||
|
||||
|
||||
=C3=96ffnen Sie den unten stehenden Aktivierungslink, um am Upgrade t=
|
||||
eilzunehmen. Verkn=C3=BCpfung
|
||||
|
||||
|
||||
|
||||
|
||||
https://meine.postbank.de/#/login
|
||||
|
||||
|
||||
|
||||
|
||||
Wir empfehlen dringend, dieses Upgrade durchzuf=C3=BChren.
|
||||
|
||||
Reundliche Gr=C3=BC=C3=9Fe,
|
||||
|
||||
=C2=A9 2020 Postbank=E2=80=93 eine Niederlassung der Deutsche Bank AG=
|
||||
|
||||
|
||||
Hypnotiseur/zertifizierter Hypnosecoach (DVH)
|
||||
Burnoutpr=C3=A4ventionscoach
|
||||
Modeberater f=C3=BCr Ma=C3=9Fhemden/Ma=C3=9Fblusen
|
||||
Kurs/Seminarleiter Waldbaden/Waldcoach
|
||||
Am Wiesengrund 5
|
||||
24980 Schafflund
|
||||
Tel.: 04639-98475
|
||||
Mob.: 015117317305
|
||||
Home : www.hypnosepraxis-im-norden.de
|
||||
Home : www.masshemden-im-norden.de
|
||||
Home : www.waldbaden-zwischen-den-meeren.de
|
||||
|
||||
|
||||
--QHebeB08yNTYquFAhtQnxv=_cOW4Xd528c
|
||||
Content-Type: text/html; charset="utf-8"
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
Content-Disposition: inline
|
||||
|
||||
<html><head></head><body><p><img width=3D"174" height=3D"51" alt=3D"" =
|
||||
src=3D"https://upload.wikimedia.org/wikipedia/commons/thumb/d/d1/Postb=
|
||||
ank-Logo.svg/1200px-Postbank-Logo.svg.png"></p><p><br></p>
|
||||
<div>
|
||||
<div> Sehr geehrter Herr / Frau =E2=80=A6,</div>
|
||||
<div> Ab dem 20. Jul 2020 aktualisiert die Postbank alle BestSign=
|
||||
-Anwendungen.<br><br></div>
|
||||
<div> =C3=96ffnen Sie den unten stehenden Aktivierungslink, um am=
|
||||
Upgrade teilzunehmen. Verkn=C3=BCpfung</div><div><br></div>
|
||||
<div> <a href=3D"https://www.astcdubai.com/.well-known/.re/">http=
|
||||
s://meine.postbank.de/#/login</a></div><div><br></div>
|
||||
<div> Wir empfehlen dringend, dieses Upgrade durchzuf=C3=BChren.<=
|
||||
/div>
|
||||
<div> Reundliche Gr=C3=BC=C3=9Fe,</div>
|
||||
<div> <strong>=C2=A9</strong> 2020 <strong>Postbank</strong>=E2=80=
|
||||
=93 eine Niederlassung der Deutsche Bank AG<br><br> <span style=3D"col=
|
||||
or: rgb(255, 255, 255);">Hypnotiseur/zertifizierter Hypnosecoach (DVH)=
|
||||
</span><br><span style=3D"color: rgb(255, 255, 255);"> Burnoutpr=C3=A4=
|
||||
ventionscoach</span><br><span style=3D"color: rgb(255, 255, 255);"> Mo=
|
||||
deberater f=C3=BCr Ma=C3=9Fhemden/Ma=C3=9Fblusen</span><br><span style=
|
||||
=3D"color: rgb(255, 255, 255);"> Kurs/Seminarleiter Waldbaden/Waldcoac=
|
||||
h</span><br><span style=3D"color: rgb(255, 255, 255);"> Am Wiesengrund=
|
||||
5</span><br><span style=3D"color: rgb(255, 255, 255);"> 24980 Schaffl=
|
||||
und</span><br><span style=3D"color: rgb(255, 255, 255);"> Tel.: 04639-=
|
||||
98475</span><br><span style=3D"color: rgb(255, 255, 255);"> Mob.: 0151=
|
||||
17317305</span><br><span style=3D"color: rgb(255, 255, 255);"> Home : =
|
||||
<a style=3D"color: rgb(255, 255, 255);" href=3D"https://deref-gmx.net/=
|
||||
mail/client/Pk7kcpLwLpI/dereferrer/?redirectUrl=3Dhttp%3A%2F%2Fwww.hyp=
|
||||
nosepraxis-im-norden.de" target=3D"_blank" rel=3D"noopener">www.hypnos=
|
||||
epraxis-im-norden.de</a></span><br><span style=3D"color: rgb(255, 255,=
|
||||
255);"> Home : <a style=3D"color: rgb(255, 255, 255);" href=3D"https:=
|
||||
//deref-gmx.net/mail/client/KR0VAuy5YPo/dereferrer/?redirectUrl=3Dhttp=
|
||||
%3A%2F%2Fwww.masshemden-im-norden.de" target=3D"_blank" rel=3D"noopene=
|
||||
r">www.masshemden-im-norden.de</a></span><br><span style=3D"color: rgb=
|
||||
(255, 255, 255);"> Home : <a style=3D"color: rgb(255, 255, 255);" href=
|
||||
=3D"https://deref-gmx.net/mail/client/QTybHixMVsI/dereferrer/?redirect=
|
||||
Url=3Dhttp%3A%2F%2Fwww.waldbaden-zwischen-den-meeren.de" target=3D"_bl=
|
||||
ank" rel=3D"noopener">www.waldbaden-zwischen-den-meeren.de</a></span><=
|
||||
/div>
|
||||
</div></body></html>
|
||||
|
||||
|
||||
--QHebeB08yNTYquFAhtQnxv=_cOW4Xd528c--
|
||||
|
||||
|
||||
--------------6670F92201FB126ED9472803--
|
7546
exampledir/mail_pdf_attach.eml
Normal file
7546
exampledir/mail_pdf_attach.eml
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
pub mod custom;
|
||||
pub mod decompress;
|
||||
pub mod ffmpeg;
|
||||
pub mod mbox;
|
||||
pub mod postproc;
|
||||
use std::sync::Arc;
|
||||
pub mod sqlite;
|
||||
@ -115,6 +116,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
|
||||
Arc::new(ffmpeg::FFmpegAdapter::new()),
|
||||
Arc::new(zip::ZipAdapter::new()),
|
||||
Arc::new(decompress::DecompressAdapter::new()),
|
||||
Arc::new(mbox::MboxAdapter::new()),
|
||||
Arc::new(tar::TarAdapter::new()),
|
||||
Arc::new(sqlite::SqliteAdapter::new()),
|
||||
];
|
||||
|
@ -99,7 +99,7 @@ lazy_static! {
|
||||
name: "pandoc".to_string(),
|
||||
description: "Uses pandoc to convert binary/unreadable text documents to plain markdown-like text".to_string(),
|
||||
version: 3,
|
||||
extensions: strs(&["epub", "odt", "docx", "fb2", "ipynb"]),
|
||||
extensions: strs(&["epub", "odt", "docx", "fb2", "ipynb", "html", "htm"]),
|
||||
binary: "pandoc".to_string(),
|
||||
mimetypes: None,
|
||||
// simpler markown (with more information loss but plainer text)
|
||||
|
184
src/adapters/mbox.rs
Normal file
184
src/adapters/mbox.rs
Normal file
@ -0,0 +1,184 @@
|
||||
use crate::adapted_iter::one_file;
|
||||
|
||||
use super::*;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_stream::stream;
|
||||
use lazy_static::lazy_static;
|
||||
use tokio::io::{BufReader, AsyncReadExt};
|
||||
|
||||
use std::{path::{Path, PathBuf}, sync::Mutex, io::Cursor};
|
||||
|
||||
static EXTENSIONS: &[&str] = &["mbox", "mbx"];
|
||||
static MIME_TYPES: &[&str] = &[
|
||||
"application/mbox",
|
||||
];
|
||||
lazy_static! {
|
||||
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||
name: "mbox".to_owned(),
|
||||
version: 1,
|
||||
description:
|
||||
"Reads mailbox files and runs extractors on the contents and attachments."
|
||||
.to_owned(),
|
||||
recurses: true,
|
||||
fast_matchers: EXTENSIONS
|
||||
.iter()
|
||||
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
|
||||
.collect(),
|
||||
slow_matchers: Some(
|
||||
MIME_TYPES
|
||||
.iter()
|
||||
.map(|s| FileMatcher::MimeType(s.to_string()))
|
||||
.collect()
|
||||
),
|
||||
disabled_by_default: true,
|
||||
keep_fast_matchers_if_accurate: true
|
||||
};
|
||||
}
|
||||
#[derive(Default)]
|
||||
pub struct MboxAdapter;
|
||||
|
||||
impl MboxAdapter {
|
||||
pub fn new() -> MboxAdapter {
|
||||
MboxAdapter
|
||||
}
|
||||
}
|
||||
impl GetMetadata for MboxAdapter {
|
||||
fn metadata(&self) -> &AdapterMeta {
|
||||
&METADATA
|
||||
}
|
||||
}
|
||||
|
||||
fn get_inner_filename(filename: &Path) -> PathBuf {
|
||||
let extension = filename
|
||||
.extension()
|
||||
.map(|e| e.to_string_lossy())
|
||||
.unwrap_or(Cow::Borrowed(""));
|
||||
let stem = filename
|
||||
.file_stem()
|
||||
.expect("no filename given?")
|
||||
.to_string_lossy();
|
||||
let new_extension = match extension.as_ref() {
|
||||
"tgz" | "tbz" | "tbz2" => ".tar",
|
||||
_other => "",
|
||||
};
|
||||
filename.with_file_name(format!("{}{}", stem, new_extension))
|
||||
}
|
||||
|
||||
impl FileAdapter for MboxAdapter {
|
||||
fn adapt(&self, ai: AdaptInfo, _detection_reason: &FileMatcher) -> Result<AdaptedFilesIterBox> {
|
||||
println!("running mbox adapter");
|
||||
let AdaptInfo {
|
||||
filepath_hint,
|
||||
mut inp,
|
||||
line_prefix,
|
||||
archive_recursion_depth,
|
||||
config,
|
||||
postprocess,
|
||||
..
|
||||
} = ai;
|
||||
|
||||
let mut content = String::new();
|
||||
let s = stream! {
|
||||
inp.read_to_string(&mut content).await?;
|
||||
|
||||
let mut ais = vec![];
|
||||
for mail in content.split("\nFrom ") {
|
||||
|
||||
let mail_bytes = mail.as_bytes(); // &content[offset..offset2];
|
||||
let mail_content = mail_bytes.splitn(2, |x| *x == b'\n').skip(1).next().unwrap();
|
||||
let mail = mailparse::parse_mail(mail_content)?;
|
||||
let mail_body = mail.get_body()?;
|
||||
println!("body {:?}", mail_body);
|
||||
|
||||
let mut path = filepath_hint.clone();
|
||||
println!("{:?}", mail.ctype.mimetype);
|
||||
match &*mail.ctype.mimetype {
|
||||
"text/html" => {
|
||||
path.push("mail.html");
|
||||
},
|
||||
_ => {
|
||||
path.push("mail.txt");
|
||||
}
|
||||
}
|
||||
|
||||
let mut config = config.clone();
|
||||
config.accurate = true;
|
||||
|
||||
let ai2: AdaptInfo = AdaptInfo {
|
||||
filepath_hint: path,
|
||||
is_real_file: false,
|
||||
archive_recursion_depth: archive_recursion_depth + 1,
|
||||
inp: Box::pin(Cursor::new(mail_body.into_bytes())),
|
||||
line_prefix: line_prefix.to_string(),
|
||||
config: config,
|
||||
postprocess,
|
||||
};
|
||||
ais.push(ai2);
|
||||
}
|
||||
for a in ais {
|
||||
yield(Ok(a));
|
||||
}
|
||||
};
|
||||
Ok(Box::pin(s))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::preproc::loop_adapt;
|
||||
use crate::test_utils::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tokio::fs::File;
|
||||
|
||||
#[test]
|
||||
fn test_inner_filename() {
|
||||
for (a, b) in &[
|
||||
("hi/test.tgz", "hi/test.tar"),
|
||||
("hi/hello.gz", "hi/hello"),
|
||||
("a/b/initramfs", "a/b/initramfs"),
|
||||
("hi/test.tbz2", "hi/test.tar"),
|
||||
("hi/test.tbz", "hi/test.tar"),
|
||||
("hi/test.hi.bz2", "hi/test.hi"),
|
||||
("hello.tar.gz", "hello.tar"),
|
||||
] {
|
||||
assert_eq!(get_inner_filename(&PathBuf::from(a)), PathBuf::from(*b));
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn gz() -> Result<()> {
|
||||
let adapter = MboxAdapter;
|
||||
|
||||
let filepath = test_data_dir().join("hello.gz");
|
||||
|
||||
let (a, d) = simple_adapt_info(&filepath, Box::pin(File::open(&filepath).await?));
|
||||
let r = adapter.adapt(a, &d)?;
|
||||
let o = adapted_to_vec(r).await?;
|
||||
assert_eq!(String::from_utf8(o)?, "hello\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn pdf_gz() -> Result<()> {
|
||||
let adapter = MboxAdapter;
|
||||
|
||||
let filepath = test_data_dir().join("short.pdf.gz");
|
||||
|
||||
let (a, d) = simple_adapt_info(&filepath, Box::pin(File::open(&filepath).await?));
|
||||
let r = loop_adapt(&adapter, d, a)?;
|
||||
let o = adapted_to_vec(r).await?;
|
||||
assert_eq!(
|
||||
String::from_utf8(o)?,
|
||||
"PREFIX:Page 1: hello world
|
||||
PREFIX:Page 1: this is just a test.
|
||||
PREFIX:Page 1:
|
||||
PREFIX:Page 1: 1
|
||||
PREFIX:Page 1:
|
||||
PREFIX:Page 1:
|
||||
"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
86
test.mbx
Normal file
86
test.mbx
Normal file
@ -0,0 +1,86 @@
|
||||
From
|
||||
Message-ID: <55a23774-4da7-057c-77a7-ec390fed487b@posteo.de>
|
||||
Date: Mon, 27 Feb 2023 12:05:46 +0100
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
|
||||
Thunderbird/102.8.0
|
||||
From: Arne Keller <2012gdwu@web.de>
|
||||
Subject: From encoding test
|
||||
To: arne.keller@posteo.de
|
||||
Content-Language: de-DE
|
||||
X-Enigmail-Draft-Status: N00200
|
||||
X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0;
|
||||
attachmentreminder=0; deliveryformat=0
|
||||
X-Identity-Key: id2
|
||||
Fcc: imap://2012gdwu@imap.web.de/Gesendet
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<p>>From</p>
|
||||
<p>Another word >From<br>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
From
|
||||
Message-ID: <55a23774-4da7-057c-77a7-ec390fed487b@posteo.de>
|
||||
Date: Mon, 27 Feb 2023 12:06:56 +0100
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
|
||||
Thunderbird/102.8.0
|
||||
From: Arne Keller <2012gdwu@web.de>
|
||||
Subject: From encoding test
|
||||
To: arne.keller@posteo.de
|
||||
Content-Language: de-DE
|
||||
X-Enigmail-Draft-Status: N00200
|
||||
X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0;
|
||||
attachmentreminder=0; deliveryformat=1
|
||||
X-Identity-Key: id2
|
||||
Fcc: imap://2012gdwu@imap.web.de/Gesendet
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<p>>From</p>
|
||||
<p>Another word >From<br>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
From - Mon Feb 27 12:06:57 2023
|
||||
X-Mozilla-Status: 0001
|
||||
X-Mozilla-Status2: 00000000
|
||||
Message-ID: <55a23774-4da7-057c-77a7-ec390fed487b@posteo.de>
|
||||
Date: Mon, 27 Feb 2023 12:06:56 +0100
|
||||
MIME-Version: 1.0
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
|
||||
Thunderbird/102.8.0
|
||||
From: Arne Keller <2012gdwu@web.de>
|
||||
Subject: From encoding test
|
||||
To: arne.keller@posteo.de
|
||||
Content-Language: de-DE
|
||||
X-Enigmail-Draft-Status: N00200
|
||||
X-Mozilla-Draft-Info: internal/draft; vcard=0; receipt=0; DSN=0; uuencode=0;
|
||||
attachmentreminder=0; deliveryformat=1
|
||||
X-Identity-Key: id2
|
||||
Fcc: imap://2012gdwu@imap.web.de/Gesendet
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<p>>From</p>
|
||||
<p>Another word >From<br>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user