lists.sr.ht/contrib/mbox-split

63 lines
1.4 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Utility for splitting a large mbox up into several smaller files.
import email
import email.policy
import mailbox
import os
import sys
if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} <input.mbox> <max size with SI suffix>")
sys.exit(1)
suffixes = {
"KiB": pow(2, 10),
"KB": pow(10, 3),
"MiB": pow(2, 20),
"MB": pow(10, 6),
"GiB": pow(2, 30),
"GB": pow(10, 9),
}
max_length = 0
for suffix, multiplier in suffixes.items():
if sys.argv[2].endswith(suffix):
max_length = int(sys.argv[2][:-len(suffix)]) * multiplier
break
if max_length == 0:
print(f"Don't understand max length of {sys.argv[2]}")
sys.exit(1)
factory = lambda f: email.message_from_bytes(f.read(), policy=email.policy.SMTP)
input_mbox = mailbox.mbox(sys.argv[1], factory=factory)
nth = 0
def gen_output():
global nth
nth += 1
path = f"{os.path.basename(sys.argv[1])}.{nth}"
print(f"Writing {path}...")
return mailbox.mbox(path)
output_mbox = gen_output()
nbytes = i = 0
nmsgs = len(input_mbox)
for msg in input_mbox.values():
# approximate
msglen = len(msg.as_bytes())
nbytes += msglen
if nbytes >= max_length:
nbytes = msglen
output_mbox.close()
print()
output_mbox = gen_output()
output_mbox.add(msg)
i += 1
sys.stdout.write(f"\r{i}/{nmsgs}")
output_mbox.close()
print(f"\nSplit {nmsgs} messages into {nth} mbox files.")