2020-12-30 15:46:56 +01:00
|
|
|
import logging
|
|
|
|
|
import multiprocessing
|
|
|
|
|
import shutil
|
|
|
|
|
|
|
|
|
|
import tqdm
|
|
|
|
|
from django import db
|
|
|
|
|
from django.core.management.base import BaseCommand
|
2022-03-11 10:55:51 -08:00
|
|
|
|
2023-11-09 11:46:37 -08:00
|
|
|
from documents.management.commands.mixins import MultiProcessMixin
|
|
|
|
|
from documents.management.commands.mixins import ProgressBarMixin
|
2023-04-20 08:10:17 -07:00
|
|
|
from documents.models import Document
|
2023-03-28 09:39:30 -07:00
|
|
|
from documents.parsers import get_parser_class_for_mime_type
|
2020-12-30 15:46:56 +01:00
|
|
|
|
|
|
|
|
|
2023-04-30 17:55:25 -07:00
|
|
|
def _process_document(doc_id):
|
|
|
|
|
document: Document = Document.objects.get(id=doc_id)
|
2021-01-05 13:50:27 +01:00
|
|
|
parser_class = get_parser_class_for_mime_type(document.mime_type)
|
|
|
|
|
|
|
|
|
|
if parser_class:
|
|
|
|
|
parser = parser_class(logging_group=None)
|
|
|
|
|
else:
|
|
|
|
|
print(f"{document} No parser for mime type {document.mime_type}")
|
|
|
|
|
return
|
|
|
|
|
|
2020-12-30 15:46:56 +01:00
|
|
|
try:
|
2022-06-11 08:38:49 -07:00
|
|
|
thumb = parser.get_thumbnail(
|
2022-03-11 10:55:51 -08:00
|
|
|
document.source_path,
|
|
|
|
|
document.mime_type,
|
|
|
|
|
document.get_public_filename(),
|
2021-02-09 22:12:43 +01:00
|
|
|
)
|
2020-12-30 15:46:56 +01:00
|
|
|
|
|
|
|
|
shutil.move(thumb, document.thumbnail_path)
|
|
|
|
|
finally:
|
|
|
|
|
parser.cleanup()
|
|
|
|
|
|
2020-12-30 17:20:03 +01:00
|
|
|
|
2023-11-09 11:46:37 -08:00
|
|
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
|
|
|
|
help = "This will regenerate the thumbnails for all documents."
|
2020-12-30 15:46:56 +01:00
|
|
|
|
|
|
|
|
def add_arguments(self, parser):
|
|
|
|
|
parser.add_argument(
|
2022-02-27 15:26:41 +01:00
|
|
|
"-d",
|
|
|
|
|
"--document",
|
2020-12-30 15:46:56 +01:00
|
|
|
default=None,
|
|
|
|
|
type=int,
|
|
|
|
|
required=False,
|
2023-11-09 11:46:37 -08:00
|
|
|
help=(
|
|
|
|
|
"Specify the ID of a document, and this command will only "
|
|
|
|
|
"run on this specific document."
|
|
|
|
|
),
|
2021-04-18 15:56:00 +02:00
|
|
|
)
|
2023-11-09 11:46:37 -08:00
|
|
|
self.add_argument_progress_bar_mixin(parser)
|
|
|
|
|
self.add_argument_processes_mixin(parser)
|
2020-12-30 15:46:56 +01:00
|
|
|
|
|
|
|
|
def handle(self, *args, **options):
|
|
|
|
|
logging.getLogger().handlers[0].level = logging.ERROR
|
|
|
|
|
|
2023-11-09 11:46:37 -08:00
|
|
|
self.handle_processes_mixin(**options)
|
|
|
|
|
self.handle_progress_bar_mixin(**options)
|
|
|
|
|
|
2022-02-27 15:26:41 +01:00
|
|
|
if options["document"]:
|
|
|
|
|
documents = Document.objects.filter(pk=options["document"])
|
2020-12-30 15:46:56 +01:00
|
|
|
else:
|
|
|
|
|
documents = Document.objects.all()
|
|
|
|
|
|
|
|
|
|
ids = [doc.id for doc in documents]
|
|
|
|
|
|
|
|
|
|
# Note to future self: this prevents django from reusing database
|
2022-06-10 13:17:41 -07:00
|
|
|
# connections between processes, which is bad and does not work
|
2020-12-30 15:46:56 +01:00
|
|
|
# with postgres.
|
|
|
|
|
db.connections.close_all()
|
|
|
|
|
|
2023-11-09 11:46:37 -08:00
|
|
|
if self.process_count == 1:
|
|
|
|
|
for doc_id in ids:
|
|
|
|
|
_process_document(doc_id)
|
|
|
|
|
else: # pragma: no cover
|
|
|
|
|
with multiprocessing.Pool(processes=self.process_count) as pool:
|
|
|
|
|
list(
|
|
|
|
|
tqdm.tqdm(
|
|
|
|
|
pool.imap_unordered(_process_document, ids),
|
|
|
|
|
total=len(ids),
|
|
|
|
|
disable=self.no_progress_bar,
|
|
|
|
|
),
|
|
|
|
|
)
|