# PDF 处理 (14_Pdf)
最后更新: 2024-09-20
# 📚 概述
DarkM 框架的 PDF 模块提供了统一的 PDF 文件处理和导出功能。支持多种 PDF 库(iTextSharp、PDFSharp),并提供 Excel 导出为 PDF 的能力。
源代码位置: DarkM/src/Framework/Pdf
# 🏗️ 模块架构
# 项目结构
Pdf/
├── Pdf.Abstractions/ # 抽象层(接口、配置)
│ ├── IPdfHandler.cs # PDF 处理接口
│ ├── IPdfExportHandler.cs # PDF 导出接口
│ ├── PdfProvider.cs # PDF 提供器枚举
│ ├── PdfConfig.cs # PDF 配置类
│ └── ExportModel.cs # 导出模型
│
├── Pdf.Integration/ # 集成层(DI 注册)
│ └── ServiceCollectionExtensions.cs # DI 扩展
│
├── Pdf.iText/ # iTextSharp 实现
│ ├── PdfHandler.cs # PDF 处理实现
│ ├── iTextHelper.cs # iText 帮助类
│ ├── ITextSharpExcelExportHandler.cs # Excel 导出实现
│ └── Pdf/ # PDF 工具类
│ ├── PdfUtils.cs
│ ├── PdfReportDataReaderDataSource.cs
│ └── ExcelHelperAbstract.cs
│
├── Pdf.PDFSharp/ # PDFSharp 实现
│ ├── DocHelper.cs # 文档帮助类
│ └── IFileConvertHelper.cs # 文件转换帮助类
│
└── Pdf.iText.Excel/ # Excel 导出扩展
└── Pdf/
├── PdfReportHandler.cs # PDF 报表处理器
├── ITextSharpExcelHandler.cs
└── NPOIExtend.cs # NPOI 扩展
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# 🔧 核心接口
# IPdfHandler(PDF 处理)
/// <summary>
/// PDF 处理类
/// </summary>
public interface IPdfHandler
{
/// <summary>
/// 获取指定 pdf 文件的页数
/// </summary>
int GetPages(string filePath);
/// <summary>
/// 获取指定 PDF 文件的文本内容
/// </summary>
string GetFullText(string filePath);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
2
3
4
5
6
7
8
9
10
11
12
13
14
15
功能说明:
GetPages()- 获取 PDF 文件页数GetFullText()- 提取 PDF 全文文本
# IPdfExportHandler(PDF 导出)
/// <summary>
/// Excel 导出处理接口
/// </summary>
public interface IPdfExportHandler
{
/// <summary>
/// 创建导出的 Pdf 文件
/// </summary>
void CreatePdf<T>(ExportModel model, IList<T> entities, Stream stream)
where T : class, new();
/// <summary>
/// 创建导出 Pdf(多 Sheet)
/// </summary>
void CreatePdf(IList<ExportExcelSheet> exportExcelSheets, Stream stream);
/// <summary>
/// 创建导出的 Pdf 文件(保存到文件)
/// </summary>
void CreatePdf<T>(ExportModel model, IList<T> entities, FileInfo fileInfo)
where T : class, new();
/// <summary>
/// 创建导出 Pdf(多 Sheet,保存到文件)
/// </summary>
void CreatePdf(IList<ExportExcelSheet> exportExcelSheets, FileInfo fileInfo);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
功能说明:
- 支持泛型导出(强类型实体)
- 支持多 Sheet 导出
- 支持 Stream 和 FileInfo 两种输出方式
# PdfProvider(PDF 提供器)
[Description("PDF 提供器")]
public enum PdfProvider
{
/// <summary>
/// iTextSharp 实现
/// </summary>
iText
}
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# PdfConfig(PDF 配置)
public class PdfConfig
{
/// <summary>
/// 提供器
/// </summary>
public PdfProvider Provider { get; set; }
/// <summary>
/// 操作时产生的临时文件存储根路径
/// </summary>
public string TempPath { get; set; }
}
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 🏗️ 依赖注入
# 自动注册机制
// Pdf.Integration/ServiceCollectionExtensions.cs
public static class ServiceCollectionExtensions
{
/// <summary>
/// 添加 PDF 功能
/// </summary>
public static IServiceCollection AddPdf(
this IServiceCollection services,
IConfiguration cfg)
{
var config = new PdfConfig();
var section = cfg.GetSection("Pdf");
if (section != null)
{
section.Bind(config);
}
services.AddSingleton(config);
// 根据配置加载对应的实现程序集
var assembly = AssemblyHelper.LoadByNameEndString(
$".Lib.Pdf.{config.Provider.ToString()}");
if (assembly == null)
return services;
// 注册 PDF 处理器
var handlerType = assembly.GetTypes()
.FirstOrDefault(m => typeof(IPdfHandler).IsAssignableFrom(m));
if (handlerType != null)
{
services.AddSingleton(typeof(IPdfHandler), handlerType);
}
// 注册 PDF 导出处理器
var exportHandlerType = assembly.GetTypes()
.FirstOrDefault(m => typeof(IPdfExportHandler).IsAssignableFrom(m));
if (exportHandlerType != null)
{
services.AddSingleton(typeof(IPdfExportHandler), exportHandlerType);
}
return services;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 💡 使用示例
# 1. 配置 PDF 模块
// appsettings.json
{
"Pdf": {
"Provider": "iText",
"TempPath": "/temp/pdf"
}
}
1
2
3
4
5
6
7
2
3
4
5
6
7
# 2. 注册服务
// Program.cs 或 Startup.cs
var builder = WebApplication.CreateBuilder(args);
// 添加 PDF 服务
builder.Services.AddPdf(builder.Configuration);
var app = builder.Build();
1
2
3
4
5
6
7
2
3
4
5
6
7
# 3. 获取 PDF 信息
public class PdfInfoService
{
private readonly IPdfHandler _pdfHandler;
private readonly ILogger<PdfInfoService> _logger;
public PdfInfoService(IPdfHandler pdfHandler, ILogger<PdfInfoService> logger)
{
_pdfHandler = pdfHandler;
_logger = logger;
}
public async Task<PdfInfoModel> GetPdfInfo(string filePath)
{
var pageCount = _pdfHandler.GetPages(filePath);
var fullText = _pdfHandler.GetFullText(filePath);
return new PdfInfoModel
{
FilePath = filePath,
PageCount = pageCount,
TextContent = fullText,
FileSize = new FileInfo(filePath).Length
};
}
}
public class PdfInfoModel
{
public string FilePath { get; set; }
public int PageCount { get; set; }
public string TextContent { get; set; }
public long FileSize { get; set; }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# 4. 导出 PDF 报表
public class ReportController : ModuleController
{
private readonly IPdfExportHandler _pdfExportHandler;
private readonly IWebHostEnvironment _env;
public ReportController(
IPdfExportHandler pdfExportHandler,
IWebHostEnvironment env)
{
_pdfExportHandler = pdfExportHandler;
_env = env;
}
/// <summary>
/// 导出销售报表为 PDF
/// </summary>
[HttpPost("export-sales-report")]
public IActionResult ExportSalesReport([FromBody] SalesReportQueryModel query)
{
// 查询数据
var salesData = _salesService.Query(query);
// 配置导出模型
var exportModel = new ExportModel
{
Title = "销售报表",
Author = "系统管理员",
Subject = "月度销售统计",
PageSize = PageSize.A4,
Orientation = Orientation.Landscape,
Columns = new List<ExportColumn>
{
new ExportColumn { Name = "订单号", Field = "OrderNo", Width = 100 },
new ExportColumn { Name = "客户名称", Field = "CustomerName", Width = 150 },
new ExportColumn { Name = "商品名称", Field = "ProductName", Width = 200 },
new ExportColumn { Name = "数量", Field = "Quantity", Width = 80 },
new ExportColumn { Name = "单价", Field = "UnitPrice", Width = 80 },
new ExportColumn { Name = "金额", Field = "TotalAmount", Width = 100 },
new ExportColumn { Name = "下单日期", Field = "OrderDate", Width = 100 }
},
FooterText = "第 {page} 页 / 共 {totalpages} 页"
};
// 导出到流
var stream = new MemoryStream();
_pdfExportHandler.CreatePdf(exportModel, salesData, stream);
stream.Position = 0;
// 返回文件
return File(stream, "application/pdf", $"销售报表_{DateTime.Now:yyyyMMddHHmmss}.pdf");
}
/// <summary>
/// 导出多 Sheet 报表
/// </summary>
[HttpPost("export-multi-sheet-report")]
public IActionResult ExportMultiSheetReport([FromBody] MultiSheetReportQueryModel query)
{
var sheets = new List<ExportExcelSheet>
{
new ExportExcelSheet
{
SheetName = "销售汇总",
Model = new ExportModel { Title = "销售汇总报表" },
DataType = typeof(SalesSummaryModel),
Data = _salesService.GetSummary(query)
},
new ExportExcelSheet
{
SheetName = "销售明细",
Model = new ExportModel { Title = "销售明细报表" },
DataType = typeof(SalesDetailModel),
Data = _salesService.GetDetails(query)
},
new ExportExcelSheet
{
SheetName = "客户统计",
Model = new ExportModel { Title = "客户统计报表" },
DataType = typeof(CustomerStatModel),
Data = _salesService.GetCustomerStats(query)
}
};
var stream = new MemoryStream();
_pdfExportHandler.CreatePdf(sheets, stream);
stream.Position = 0;
return File(stream, "application/pdf", $"综合报表_{DateTime.Now:yyyyMMddHHmmss}.pdf");
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# 5. 导出模型配置
// ExportModel.cs
public class ExportModel
{
/// <summary>
/// 报表标题
/// </summary>
public string Title { get; set; }
/// <summary>
/// 作者
/// </summary>
public string Author { get; set; }
/// <summary>
/// 主题
/// </summary>
public string Subject { get; set; }
/// <summary>
/// 纸张大小
/// </summary>
public PageSize PageSize { get; set; } = PageSize.A4;
/// <summary>
/// 页面方向
/// </summary>
public Orientation Orientation { get; set; } = Orientation.Portrait;
/// <summary>
/// 列配置
/// </summary>
public List<ExportColumn> Columns { get; set; }
/// <summary>
/// 页脚文本
/// </summary>
public string FooterText { get; set; }
/// <summary>
/// 是否显示网格线
/// </summary>
public bool ShowGridLines { get; set; } = true;
/// <summary>
/// 表头背景色
/// </summary>
public string HeaderBackgroundColor { get; set; } = "#4CAF50";
/// <summary>
/// 表头文字颜色
/// </summary>
public string HeaderTextColor { get; set; } = "#FFFFFF";
}
// ExportColumn.cs
public class ExportColumn
{
/// <summary>
/// 列名(显示)
/// </summary>
public string Name { get; set; }
/// <summary>
/// 字段名(实体属性)
/// </summary>
public string Field { get; set; }
/// <summary>
/// 列宽
/// </summary>
public float Width { get; set; } = 100;
/// <summary>
/// 对齐方式
/// </summary>
public Alignment Alignment { get; set; } = Alignment.Left;
/// <summary>
/// 格式化字符串
/// </summary>
public string Format { get; set; }
/// <summary>
/// 是否自动换行
/// </summary>
public bool WrapText { get; set; } = false;
}
// ExportExcelSheet.cs
public class ExportExcelSheet
{
/// <summary>
/// Sheet 名称
/// </summary>
public string SheetName { get; set; }
/// <summary>
/// 导出模型配置
/// </summary>
public ExportModel Model { get; set; }
/// <summary>
/// 数据类型
/// </summary>
public Type DataType { get; set; }
/// <summary>
/// 数据源
/// </summary>
public IList Data { get; set; }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# 🔧 实现提供器
# Pdf.iText(iTextSharp 实现)
特点:
- ✅ 功能强大,支持复杂的 PDF 操作
- ✅ 支持中文(需配置字体)
- ✅ 支持表单、加密、数字签名
- ⚠️ AGPL 商业许可(商业用途需购买)
主要类:
PdfHandler- PDF 读取处理ITextSharpExcelExportHandler- Excel 导出为 PDFPdfReportHandler- PDF 报表生成
# Pdf.PDFSharp(PDFSharp 实现)
特点:
- ✅ 轻量级
- ✅ 完全免费(MIT 许可)
- ✅ 支持基本的 PDF 创建和编辑
- ⚠️ 功能相对简单
主要类:
DocHelper- 文档帮助类IFileConvertHelper- 文件转换帮助类
# Pdf.iText.Excel(Excel 导出扩展)
特点:
- ✅ 基于 NPOI 读取 Excel
- ✅ 支持多 Sheet 导出
- ✅ 支持自定义样式
- ✅ 支持数据源适配器(DataReader/DataTable/List)
主要类:
PdfReportHandler- PDF 报表处理器ITextSharpExcelHandler- Excel 处理NPOIExtend- NPOI 扩展方法
# 💡 最佳实践
# 1. 中文支持配置
// 使用 iTextSharp 时配置中文字体
public class PdfHandler : IPdfHandler
{
private readonly BaseFont _chineseFont;
public PdfHandler()
{
// 加载中文字体
_chineseFont = BaseFont.CreateFont(
"STSong-Light",
"UniGB-UCS2-H",
BaseFont.NOT_EMBEDDED);
}
// 使用 _chineseFont 创建包含中文的 PDF
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 2. 大数据导出优化
// 分页导出大数据
public async Task ExportLargeData(IList<LargeDataModel> allData)
{
const int pageSize = 1000;
var totalPages = (int)Math.Ceiling(allData.Count / (double)pageSize);
var sheets = new List<ExportExcelSheet>();
for (int i = 0; i < totalPages; i++)
{
var pageData = allData.Skip(i * pageSize).Take(pageSize).ToList();
sheets.Add(new ExportExcelSheet
{
SheetName = $"数据_{i + 1}",
Model = new ExportModel
{
Title = $"大数据导出 - 第{i + 1}页"
},
DataType = typeof(LargeDataModel),
Data = pageData
});
}
var stream = new MemoryStream();
_pdfExportHandler.CreatePdf(sheets, stream);
// ...
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 3. 临时文件清理
public class PdfService : IDisposable
{
private readonly List<string> _tempFiles = new List<string>();
private readonly string _tempPath;
public PdfService(IOptions<PdfConfig> config)
{
_tempPath = config.Value.TempPath;
}
public string CreateTempPdf()
{
var tempFile = Path.Combine(_tempPath, $"{Guid.NewGuid()}.pdf");
_tempFiles.Add(tempFile);
return tempFile;
}
public void Dispose()
{
// 清理临时文件
foreach (var file in _tempFiles)
{
try
{
if (File.Exists(file))
File.Delete(file);
}
catch (Exception ex)
{
// 记录日志
}
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 4. 导出性能优化
// 使用异步导出
public async Task<IActionResult> ExportAsync()
{
return await Task.Run(() =>
{
var data = _service.GetData();
var stream = new MemoryStream();
_pdfExportHandler.CreatePdf(_model, data, stream);
stream.Position = 0;
return File(stream, "application/pdf", "report.pdf");
});
}
// 或使用后台任务处理大数据
public async Task<IActionResult> ExportInBackground()
{
var jobId = Guid.NewGuid().ToString();
// 启动后台任务
_ = Task.Run(async () =>
{
var data = await _service.GetDataAsync();
var filePath = Path.Combine(_tempPath, $"{jobId}.pdf");
using var fileStream = new FileStream(filePath, FileMode.Create);
_pdfExportHandler.CreatePdf(_model, data, fileStream);
// 发送通知
await _notificationService.SendDownloadReady(jobId, filePath);
});
return Ok(new { JobId = jobId, Message = "导出任务已启动,完成后将通知您" });
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# 🔍 常见问题
# Q1: 中文显示乱码?
解决方案:
// iTextSharp 需要配置中文字体
BaseFont baseFont = BaseFont.CreateFont(
"STSong-Light", // 中文字体
"UniGB-UCS2-H", // 编码
BaseFont.NOT_EMBEDDED);
var font = new iTextSharp.text.Font(baseFont, 12);
1
2
3
4
5
6
7
2
3
4
5
6
7
# Q2: 导出大文件内存溢出?
解决方案:
- ✅ 使用分页导出
- ✅ 使用流式写入(不一次性加载所有数据)
- ✅ 增加服务器内存限制
- ✅ 使用后台任务处理
# Q3: 如何选择 PDF 提供器?
| 提供器 | 适用场景 | 许可 |
|---|---|---|
| iText | 企业级应用、复杂 PDF 需求 | AGPL(商业需购买) |
| PDFSharp | 简单 PDF 创建、预算有限 | MIT(免费) |
# 📚 相关文档
# 🔗 参考链接
- 源代码 (opens new window) -
src/Framework/Pdf - iTextSharp 文档 (opens new window)
- PDFSharp 文档 (opens new window)
- NPOI 文档 (opens new window)
最后更新: 2024-09-20