Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDF/A docs #35

Merged
merged 5 commits into from
Sep 4, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove all checked operations
laurmaedje committed Sep 4, 2024
commit a8a0e59f82a38cb39e9ff28ac965e05cd2901fd1
16 changes: 0 additions & 16 deletions src/color.rs
Original file line number Diff line number Diff line change
@@ -354,7 +354,6 @@ impl ColorSpace<'_> {

/// Device color spaces.
///
///
/// Please note that the use of the device color spaces is restricted by several
/// PDF standards such as PDF/A, PDF/X, et cetera. Their appearance will be
/// governed by any applicable [output intent](crate::writers::OutputIntent) and
@@ -632,21 +631,6 @@ impl<'a> DeviceN<'a> {

DeviceNAttrs::start(self.array.push())
}

/// Finish writing the `DeviceN` color space array while checking some
/// provisions of PDF/A-2 clause 6.2.4.4 and 6.1.13.
pub fn finish_pdfa(self) -> PdfaResult<()> {
if self.array.len() > 8 {
return Err(PdfaError::TooManyColorants(self.array.len() as usize));
}

if !self.has_alternate || !self.has_tint {
return Err(PdfaError::MalformedDeviceNArray);
}

self.finish();
Ok(())
}
}

/// Writer for a _DeviceN attributes dictionary_. PDF 1.6+.
20 changes: 1 addition & 19 deletions src/content.rs
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@ use super::*;
/// A builder for a content stream.
pub struct Content {
buf: Vec<u8>,
q_nesting: usize,
}

/// Core methods.
@@ -17,7 +16,7 @@ impl Content {

/// Create a new content stream with the specified initial buffer capacity.
pub fn with_capacity(capacity: usize) -> Self {
Self { buf: Vec::with_capacity(capacity), q_nesting: 0 }
Self { buf: Vec::with_capacity(capacity) }
}

/// Start writing an arbitrary operation.
@@ -244,30 +243,13 @@ impl Content {
#[inline]
pub fn save_state(&mut self) -> &mut Self {
self.op("q");

// Saturating is okay here since we would have returned an error way
// before if the nesting was checked.
self.q_nesting = self.q_nesting.saturating_add(1);
self
}

/// `q`: Save the graphics state on the stack while checking that the
/// nesting limit in PDF/A-2 clause 6.1.13 is respected.
#[inline]
pub fn save_state_checked(&mut self) -> PdfaResult<&mut Self> {
if self.q_nesting >= 28 {
return Err(PdfaError::OverlyNestedGraphicsState);
}

Ok(self.save_state())
}

/// `Q`: Restore the graphics state from the stack.
#[inline]
pub fn restore_state(&mut self) -> &mut Self {
self.op("Q");

self.q_nesting = self.q_nesting.saturating_sub(1);
self
}

33 changes: 1 addition & 32 deletions src/font.rs
Original file line number Diff line number Diff line change
@@ -605,7 +605,7 @@ impl FontDescriptor<'_> {
}

/// Write the `/CIDSet` attribute.
///
///
/// If present in PDF/A, this must include all characters in the subset,
/// even if they are not used in the document.
pub fn cid_set(&mut self, id: Ref) -> &mut Self {
@@ -939,46 +939,17 @@ where
self.pair_with_multiple(glyph, [codepoint]);
}

/// Add a mapping from a glyph ID to a codepoint, checking for codepoints
/// that are invalid in some PDF/A profiles.
pub fn pair_pdfa(&mut self, glyph: G, codepoint: char) -> PdfaResult<()> {
self.pair_with_multiple_pdfa(glyph, [codepoint])
}

/// Add a mapping from a glyph ID to multiple codepoints.
pub fn pair_with_multiple(
&mut self,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
) {
self.pair_with_multiple_impl(glyph, codepoints, false).unwrap();
}

/// Add a mapping from a glyph ID to multiple codepoints, checking for
/// codepoints that are invalid in some PDF/A profiles.
pub fn pair_with_multiple_pdfa(
&mut self,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
) -> PdfaResult<()> {
self.pair_with_multiple_impl(glyph, codepoints, true)
}

fn pair_with_multiple_impl(
&mut self,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
check_pdfa: bool,
) -> PdfaResult<()> {
self.mappings.push(b'<');
glyph.push(&mut self.mappings);
self.mappings.extend(b"> <");

for c in codepoints {
if check_pdfa && (c == '\u{0}' || c == '\u{feff}' || c == '\u{fffe}') {
return Err(PdfaError::InvalidCMapCodepoint);
}

for &mut part in c.encode_utf16(&mut [0; 2]) {
self.mappings.push_hex_u16(part);
}
@@ -991,8 +962,6 @@ where
if self.count >= 100 {
self.flush_range();
}

Ok(())
}

/// Finish building the character map.
3 changes: 1 addition & 2 deletions src/forms.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::types::AnnotationType;

use super::*;
use crate::types::AnnotationType;

/// Writer for an _interactive forms dictionary_. PDF 1.2+.
///
23 changes: 2 additions & 21 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -177,15 +177,13 @@ pub mod types {
pub use object::Predictor;
pub use renditions::{MediaClipType, RenditionType, TempFileType};
pub use structure::{
Direction, NumberingStyle, OutlineItemFlags, PageLayout, PageMode, PdfaError,
PdfaResult, StructRole, TabOrder, TrappingStatus,
Direction, NumberingStyle, OutlineItemFlags, PageLayout, PageMode, StructRole,
TabOrder, TrappingStatus,
};
pub use transitions::{TransitionAngle, TransitionStyle};
pub use xobject::SMaskInData;
}

use structure::{PdfaError, PdfaResult};

pub use self::chunk::Chunk;
pub use self::content::Content;
pub use self::object::{
@@ -278,23 +276,6 @@ impl Pdf {
self.indirect(id).start()
}

/// Write the cross-reference table and file trailer and return the
/// underlying buffer while checking the number of indirect objects and
/// whether a file ID was written for compliance with PDF/A.
///
/// Panics if any indirect reference id was used twice.
pub fn finish_pdfa(self) -> PdfaResult<Vec<u8>> {
if self.chunk.offsets.len() > 8388607 {
return Err(PdfaError::TooManyIndirectObjects(self.chunk.offsets.len()));
}

if self.file_id.is_none() {
return Err(PdfaError::MissingFileID);
}

Ok(self.finish())
}

/// Write the cross-reference table and file trailer and return the
/// underlying buffer.
///
49 changes: 1 addition & 48 deletions src/object.rs
Original file line number Diff line number Diff line change
@@ -3,8 +3,6 @@ use std::marker::PhantomData;
use std::mem::ManuallyDrop;
use std::num::NonZeroI32;

use structure::{PdfaError, PdfaResult};

use super::*;

/// A primitive PDF object.
@@ -54,18 +52,7 @@ impl Primitive for f32 {
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Str<'a>(pub &'a [u8]);

impl<'a> Str<'a> {
/// Construct a new string and check that it is no longer than 32767 bytes.
///
/// This helps to ensure compliance with Section 6.1.8 in the PDF/A-2 spec.
pub fn pdfa(bytes: &'a [u8]) -> PdfaResult<Self> {
if bytes.len() > 32767 {
return Err(PdfaError::OverlongString(bytes.len()));
}

Ok(Self(bytes))
}

impl Str<'_> {
/// Whether the parentheses in the byte string are balanced.
fn is_balanced(self) -> bool {
let mut depth = 0;
@@ -162,22 +149,6 @@ impl Primitive for TextStr<'_> {
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Name<'a>(pub &'a [u8]);

impl<'a> Name<'a> {
/// Create a new name from a byte string and check that it is valid UTF-8
/// and no longer than 127 bytes.
///
/// This helps to ensure compliance with Section 6.1.8 in the PDF/A
/// specifications PDF/A-2, PDF/A-3, and PDF/A-4.
pub fn pdfa(bytes: &'a [u8]) -> PdfaResult<Self> {
if bytes.len() > 127 {
return Err(PdfaError::OverlongName(bytes.len()));
}

std::str::from_utf8(bytes).map_err(PdfaError::NameNotUtf8)?;
Ok(Self(bytes))
}
}

impl Primitive for Name<'_> {
fn write(self, buf: &mut Vec<u8>) {
buf.reserve(1 + self.0.len());
@@ -300,24 +271,6 @@ impl Rect {
Self { x1, y1, x2, y2 }
}

/// Create a new rectangle that complies with the implementation limits for
/// page sizes.
#[inline]
pub fn page(x1: f32, y1: f32, x2: f32, y2: f32) -> PdfaResult<Self> {
let width = (x2 - x1).abs();
let height = (y2 - y1).abs();

if !(3.0..=14400.0).contains(&width) {
return Err(PdfaError::PageWidthOutOfRange(width));
}

if !(3.0..=14400.0).contains(&height) {
return Err(PdfaError::PageHeightOutOfRange(height));
}

Ok(Self { x1, y1, x2, y2 })
}

/// Convert this rectangle into 8 floats describing the four corners of the
/// rectangle in counterclockwise order.
#[inline]
93 changes: 1 addition & 92 deletions src/structure.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use std::str::Utf8Error;

use crate::color::SeparationInfo;

use super::*;
use crate::color::SeparationInfo;

/// Writer for a _document catalog dictionary_.
///
@@ -1572,91 +1569,3 @@ impl<'a> Metadata<'a> {
}

deref!('a, Metadata<'a> => Stream<'a>, stream);

/// A result type for operations that check for PDF/A compliance.
pub type PdfaResult<T> = Result<T, PdfaError>;

/// Errors that pdf-writer can automatically detect when writing PDF/A files.
///
/// Please note that these errors only enforce provisions of clauses 6.1.8,
/// 6.1.13, 6.1.3, and 6.2.11.7.2 of the PDF/A-2 spec. They do not enforce the
/// entire spec, so additional attention needs to be paid to write compliant
/// files.
///
/// Integer and float implementation limits are not checked since they are
/// already enforced by the `i32` and `f32` types, respectively.
#[derive(Debug, Clone, PartialEq)]
pub enum PdfaError {
/// A string contained more than 32767 bytes.
OverlongString(usize),
/// A name object contained more than 127 bytes.
OverlongName(usize),
/// A name object was not UTF-8 decodable.
NameNotUtf8(Utf8Error),
/// The file has more than 8388607 indirect objects.
TooManyIndirectObjects(usize),
/// The graphics state was nested more than 28 levels deep.
OverlyNestedGraphicsState,
/// A DeviceN color space had more than 8 colorants.
TooManyColorants(usize),
/// The DeviceN array does not comply with clause 8.6.6.5 of ISO
/// 32000-1:2008.
MalformedDeviceNArray,
/// The file trailer is missing a file ID.
///
/// Call [`crate::Pdf::set_file_id`] before finishing the file.
MissingFileID,
/// The CMap maps to a codepoint 0, U+FFFE, or U+FEFF.
///
/// Only applicable to PDF/A-2u, PDF/A-2a, and similar profiles in other
/// parts.
InvalidCMapCodepoint,
/// The page width is out of range.
PageWidthOutOfRange(f32),
/// The page height is out of range.
PageHeightOutOfRange(f32),
}

impl std::fmt::Display for PdfaError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::OverlongString(len) => {
write!(f, "string contained {} bytes but must not exceed 32767", len)
}
Self::OverlongName(len) => {
write!(f, "name contained {} bytes but must not exceed 127", len)
}
Self::NameNotUtf8(e) => write!(f, "name was not UTF-8 decodable ({})", e),
Self::TooManyIndirectObjects(count) => write!(
f,
"file has {} indirect objects but must not exceed 8388607",
count
),
Self::OverlyNestedGraphicsState => {
f.write_str("graphics state (q) was nested more than 28 levels deep")
}
Self::TooManyColorants(count) => write!(
f,
"DeviceN color space had {} colorants but must not exceed 8",
count
),
Self::MalformedDeviceNArray => f.write_str("DeviceN array is malformed"),
Self::MissingFileID => f.write_str("file trailer is missing a file ID"),
Self::InvalidCMapCodepoint => {
f.write_str("CMap maps to a forbidden codepoint")
}
Self::PageWidthOutOfRange(w) if *w < 3.0 => {
write!(f, "page width {} is too small (must be at least 3)", w)
}
Self::PageWidthOutOfRange(w) => {
write!(f, "page width {} is too large (must be at most 14400)", w)
}
Self::PageHeightOutOfRange(h) if *h < 3.0 => {
write!(f, "page height {} is too small (must be at least 3)", h)
}
Self::PageHeightOutOfRange(h) => {
write!(f, "page height {} is too large (must be at most 14400)", h)
}
}
}
}