因个人需求,需要将html格式转换成PDF并加上水印图片。于是乎第一次接触这种需求的小菜鸟博主我,在某度搜索引擎上不断地查阅关键字资料、踩坑,终于有了一个相应的解决方案。以下是解决步骤,记录下来方便以后的回顾,以及各位大神们的品鉴。
1、在 NuGet 搜索 itextsharp 关键字 下载以下截图圈中的两个包,一般下载完后项目会自动引用。
2、在项目文件中引入以下命名空间(建议下面提及的代码封装成类库,方便项目间调用,个人取舍)
3、Html字符串转pdf文件流,加水印图片以及未加水印重载 精简帮助类(由博主踩坑整理,仅完成个人业务需求)
1 /// <summary> 2 /// Html字符串转PDF输出帮助类 3 /// </summary> 4 public class HtmlToPdfHelper 5 { 6 #region HtmlToPDF 7 8 /// <summary> 9 /// 判断是否有乱码 10 /// </summary> 11 /// <param name="txt"></param> 12 /// <returns></returns> 13 private static bool IsMessyCode(string txt) 14 { 15 var bytes = Encoding.UTF8.GetBytes(txt); 16 for (var i = 0; i < bytes.Length; i++) 17 { 18 if (i < bytes.Length - 3) 19 if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189) 20 { 21 return true; 22 } 23 } 24 return false; 25 } 26 27 /// <summary> 28 /// 将Html字符串 输出到PDF档里 29 /// </summary> 30 /// <param name="htmlText"></param> 31 /// <returns></returns> 32 public static byte[] ConvertHtmlTextToPdf(string htmlText) 33 { 34 return ConvertHtmlTextToPdf(htmlText, "", 0, 0, 0, 0); 35 } 36 37 /// <summary> 38 /// 将Html字符串 输出到PDF档里,并添加水印 39 /// </summary> 40 /// <param name="htmlText">网页代码</param> 41 /// <param name="picPath">水印路径</param> 42 /// <param name="left">距离左边距离</param> 43 /// <param name="top">距顶部距离</param> 44 /// <param name="width">水印宽度</param> 45 /// <param name="height">水印高度</param> 46 /// <returns></returns> 47 public static byte[] ConvertHtmlTextToPdf(string htmlText, string picPath, int left, int top, int width, int height) 48 { 49 if (string.IsNullOrEmpty(htmlText)) 50 { 51 return null; 52 } 53 //避免当htmlText无任何html tag标签的纯文字时,转PDF时会挂掉,所以一律加上<p>标签 54 htmlText = "<p>" + htmlText + "</p>"; 55 MemoryStream outputStream = new MemoryStream();//要把PDF写到哪个串流 56 byte[] data = Encoding.UTF8.GetBytes(htmlText);//字串转成byte[] 57 MemoryStream msInput = new MemoryStream(data); 58 Document doc = new Document();//要写PDF的文件,建构子没填的话预设直式A4 59 PdfWriter writer = PdfWriter.GetInstance(doc, outputStream); 60 //指定文件预设开档时的缩放为100% 61 PdfDestination pdfDest = new PdfDestination(PdfDestination.XYZ, 0, doc.PageSize.Height, 1f); 62 //开启Document文件 63 doc.Open(); 64 65 //写入水印图片 66 if (!string.IsNullOrEmpty(picPath)) 67 { 68 Image img = Image.GetInstance(picPath); 69 //设置图片的位置 70 img.SetAbsolutePosition(width + left, (doc.PageSize.Height - height) - top); 71 //设置图片的大小 72 img.ScaleAbsolute(width, height); 73 doc.Add(img); 74 } 75 try 76 { 77 //使用XMLWorkerHelper把Html parse到PDF档里 79 XMLWorkerHelper.GetInstance().ParseXHtml(writer, doc, msInput, null, Encoding.UTF8); 80 //将pdfDest设定的资料写到PDF档 81 PdfAction action = PdfAction.GotoLocalPage(1, pdfDest, writer); 82 writer.SetOpenAction(action); 83 } 84 catch (Exception) 85 { 86 return null; 87 } 88 doc.Close(); 89 msInput.Close(); 90 outputStream.Close(); 91 //回传PDF档案 92 return outputStream.ToArray(); 94 } 95 96 #endregion 97 98 }
4、获取网页字符串的方法
1 /// <summary> 2 /// 获取网站内容,包含了 HTML+CSS+JS 3 /// </summary> 4 /// <returns>String返回网页信息</returns> 5 public static string GetWebContent(string inpath) 6 { 7 try 8 { 9 WebClient myWebClient = new WebClient(); 10 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据 11 myWebClient.Credentials = CredentialCache.DefaultCredentials; 12 //从指定网站下载数据 13 Byte[] pageData = myWebClient.DownloadData(inpath); 14 //如果获取网站页面采用的是GB2312,则使用这句 15 string pageHtml = Encoding.UTF8.GetString(pageData); 16 bool isBool = IsMessyCode(pageHtml);//判断使用哪种编码 读取网页信息 17 if (!isBool) 18 { 19 string pageHtml1 = Encoding.UTF8.GetString(pageData); 20 pageHtml = pageHtml1; 21 } 22 else 23 { 24 string pageHtml2 = Encoding.Default.GetString(pageData); 25 pageHtml = pageHtml2; 26 } 27 return pageHtml; 28 } 29 catch (WebException webEx) 30 { 31 return webEx.Message; 32 } 33 }
5、MVC设计模式下获取控制器视图Html方法,很XX的一个问题就是只能获取调用此方法的控制器下所有视图,不能跨控制器获取视图,有待优化
1 /// <summary> 2 /// 获取MVC视图Html 3 /// </summary> 4 /// <param name="context">控制器上下文</param> 5 /// <param name="viewName">视图名称</param> 6 /// <param name="param"></param> 7 /// <returns></returns> 8 public static string GetViewHtml(ControllerContext context, string viewName) 9 { 10 if (string.IsNullOrEmpty(viewName)) 11 viewName = context.RouteData.GetRequiredString("action"); 12 using (var sw = new StringWriter()) 13 { 14 ViewEngineResult viewResult = ViewEngines.Engines.FindPartialView(context, viewName); 15 var viewContext = new ViewContext(context, viewResult.View, context.Controller.ViewData, context.Controller.TempData, sw); 16 try 17 { 18 viewResult.View.Render(viewContext, sw); 19 } 20 catch (Exception ex) 21 { 22 throw; 23 } 24 25 return sw.GetStringBuilder().ToString(); 26 } 27 }
6、将pdf流输出至客户浏览器下载方法
1 /// <summary> 2 /// 将pdf文件流输出至浏览器下载 3 /// </summary> 4 /// <param name="pdfFile">PDF文件流</param> 5 public static void PdfDownload(byte[] pdfFile) 6 { 7 byte[] buffer = pdfFile; 8 Stream iStream = new MemoryStream(buffer); 9 try 10 { 11 int length; 12 long dataToRead; 13 string filename = DateTime.Now.ToString("yyyyMMddHHmmss") + ".pdf";//保存的文件名称 14 dataToRead = iStream.Length; 15 HttpContext.Current.Response.Clear(); 16 HttpContext.Current.Response.ClearHeaders(); 17 HttpContext.Current.Response.ClearContent(); 18 HttpContext.Current.Response.ContentType = "application/pdf"; //文件类型 19 HttpContext.Current.Response.AddHeader("Content-Length", dataToRead.ToString());//添加文件长度,进而显示进度 20 HttpContext.Current.Response.AddHeader("Content-Disposition", "attachment; filename=" + HttpUtility.UrlEncode(filename, Encoding.UTF8)); 21 while (dataToRead > 0) 22 { 23 if (HttpContext.Current.Response.IsClientConnected) 24 { 25 length = buffer.Length; 26 HttpContext.Current.Response.OutputStream.Write(buffer, 0, length); 27 HttpContext.Current.Response.Flush(); 28 buffer = new Byte[length]; 29 dataToRead = dataToRead - length; 30 } 31 else 32 { 33 dataToRead = -1; 34 } 35 } 36 } 37 catch (Exception ex) 38 { 39 HttpContext.Current.Response.Write("文件下载时出现错误!"); 40 } 41 finally 42 { 43 if (iStream != null) 44 { 45 iStream.Close(); 46 } 47 //结束响应,否则将导致网页内容被输出到文件,进而文件无法打开 48 HttpContext.Current.Response.Flush(); 49 HttpContext.Current.Response.End(); 50 51 } 52 }
7、MVC控制器下调用Demo(步骤4、6 方法封装至帮助类)
1 public class HomeController : Controller 2 { 3 // 4 // GET: /Home/ 5 6 public ActionResult Index() 7 { 8 //从网址下载Html字符串(方法一) 9 string inpath = System.Web.HttpContext.Current.Server.MapPath("~/PDFTemplate/test.html"); 10 string htmlText = HtmlToPdfHelper.GetWebContent(inpath);//此处调用步骤4方法 11 12 //获取MVC视图Html字符串(方法二) 13 //string htmlText = GetViewHtml(ControllerContext, "Test");//此处调用步骤5方法 14 15 //水印图片路径 16 string picPath = Server.MapPath("~/PDFTemplate/TemplateImg/authentication-iocn.png"); 17 //html转pdf并加上水印 18 byte[] pdfFile = HtmlToPdfHelper.ConvertHtmlTextToPdf(htmlText, picPath, 100, 200, 100, 100); 19 //输出至客户端 20 HtmlToPdfHelper.PdfDownload(pdfFile);//此处调用步骤6方法 21 22 return View(); 23 } 24 25 public ActionResult Test() 26 { 27 return View(); 28 } 29 30 /// <summary> 31 /// 获取MVC视图Html 32 /// </summary> 33 /// <param name="context"></param> 34 /// <param name="viewName">视图名称</param> 35 /// <returns></returns> 36 public static string GetViewHtml(ControllerContext context, string viewName) 37 { 38 if (string.IsNullOrEmpty(viewName)) 39 viewName = context.RouteData.GetRequiredString("action"); 40 using (var sw = new StringWriter()) 41 { 42 ViewEngineResult viewResult = ViewEngines.Engines.FindPartialView(context, viewName); 43 var viewContext = new ViewContext(context, viewResult.View, context.Controller.ViewData, 44 context.Controller.TempData, sw); 45 try 46 { 47 viewResult.View.Render(viewContext, sw); 48 } 49 catch (Exception ex) 50 { 51 throw; 52 } 53 54 return sw.GetStringBuilder().ToString(); 55 } 56 } 57 }
总结:我理解的解决思路是将html读取转换成字符串,之后再通过 itextsharp 转换成 pdf 比特币 传输至客户端或直接保存至服务器生成链接供用户下载。(新手上路,不妥之处,欢迎各位大神指教)
以上代码仅满足个人业务逻辑需求,谢谢浏览。