initial commit
All checks were successful
Deploy docs / build-and-deploy (push) Successful in 3s

This commit is contained in:
sid 2026-02-23 20:34:35 +01:00
commit 95a533c876
451 changed files with 18255 additions and 0 deletions

106
pkgs/marker-pdf/default.nix Normal file
View file

@ -0,0 +1,106 @@
{
lib,
python3,
fetchPypi,
fetchurl,
}:
let
fontFileName = "GoNotoCurrent-Regular.ttf";
fetchFont = fetchurl {
url = "https://models.datalab.to/artifacts/${fontFileName}";
hash = "sha256-iCr7q5ZWCMLSvGJ/2AFrliqlpr4tNY+d4kp7WWfFYy4=";
};
python = python3;
pdftext = import ./pdftext.nix { inherit lib python fetchPypi; };
surya-ocr = import ./surya-ocr.nix { inherit lib python fetchPypi; };
in
python.pkgs.buildPythonApplication rec {
pname = "marker-pdf";
version = "1.8.2";
pyproject = true;
src = fetchPypi {
pname = "marker_pdf";
inherit version;
hash = "sha256-k2mxOpBBtXdCzxP4hqfXnCEqUF69hQZWr/d9V/tITZ4=";
};
patches = [
./skip-font-download.patch
./fix-output-dir.patch
];
pythonRelaxDeps = [
"click"
"anthropic"
"markdownify"
"pillow"
];
pythonRemoveDeps = [
"pre-commit"
];
postInstall = ''
FONT_DEST_DIR="$out/lib/${python.libPrefix}/site-packages/static/fonts"
mkdir -p $FONT_DEST_DIR
cp ${fetchFont} "$FONT_DEST_DIR/${fontFileName}"
echo "Installed font to $FONT_DEST_DIR/${fontFileName}"
'';
build-system = [
python.pkgs.poetry-core
];
dependencies = [
pdftext
surya-ocr
]
++ (with python.pkgs; [
anthropic
click
filetype
ftfy
google-genai
markdown2
markdownify
openai
pillow
pydantic
pydantic-settings
python-dotenv
rapidfuzz
regex
scikit-learn
torch
tqdm
transformers
]);
optional-dependencies = with python.pkgs; {
full = [
ebooklib
mammoth
openpyxl
python-pptx
weasyprint
];
};
pythonImportsCheck = [
"marker"
];
meta = {
description = "Convert documents to markdown with high speed and accuracy";
homepage = "https://pypi.org/project/marker-pdf/";
license = lib.licenses.gpl3Only;
maintainers = with lib.maintainers; [ ];
};
}