63 lines
1.4 KiB
Python
Executable File
63 lines
1.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Utility for splitting a large mbox up into several smaller files.
|
|
|
|
import email
|
|
import email.policy
|
|
import mailbox
|
|
import os
|
|
import sys
|
|
|
|
if len(sys.argv) != 3:
|
|
print(f"Usage: {sys.argv[0]} <input.mbox> <max size with SI suffix>")
|
|
sys.exit(1)
|
|
|
|
suffixes = {
|
|
"KiB": pow(2, 10),
|
|
"KB": pow(10, 3),
|
|
"MiB": pow(2, 20),
|
|
"MB": pow(10, 6),
|
|
"GiB": pow(2, 30),
|
|
"GB": pow(10, 9),
|
|
}
|
|
|
|
max_length = 0
|
|
for suffix, multiplier in suffixes.items():
|
|
if sys.argv[2].endswith(suffix):
|
|
max_length = int(sys.argv[2][:-len(suffix)]) * multiplier
|
|
break
|
|
if max_length == 0:
|
|
print(f"Don't understand max length of {sys.argv[2]}")
|
|
sys.exit(1)
|
|
|
|
factory = lambda f: email.message_from_bytes(f.read(), policy=email.policy.SMTP)
|
|
input_mbox = mailbox.mbox(sys.argv[1], factory=factory)
|
|
nth = 0
|
|
|
|
def gen_output():
|
|
global nth
|
|
nth += 1
|
|
path = f"{os.path.basename(sys.argv[1])}.{nth}"
|
|
print(f"Writing {path}...")
|
|
return mailbox.mbox(path)
|
|
|
|
output_mbox = gen_output()
|
|
nbytes = i = 0
|
|
nmsgs = len(input_mbox)
|
|
|
|
for msg in input_mbox.values():
|
|
# approximate
|
|
msglen = len(msg.as_bytes())
|
|
nbytes += msglen
|
|
if nbytes >= max_length:
|
|
nbytes = msglen
|
|
output_mbox.close()
|
|
print()
|
|
output_mbox = gen_output()
|
|
output_mbox.add(msg)
|
|
i += 1
|
|
sys.stdout.write(f"\r{i}/{nmsgs}")
|
|
|
|
output_mbox.close()
|
|
print(f"\nSplit {nmsgs} messages into {nth} mbox files.")
|