-
Notifications
You must be signed in to change notification settings - Fork 2
/
Program.cs
137 lines (121 loc) · 3.68 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Text.RegularExpressions;
namespace WordPressExportConverter
{
class Program
{
static void Main(string[] args)
{
string inpath = @"C:\Users\Dave\Desktop\wordpress.input.xml";
string outpath = @"C:\Users\Dave\Desktop\wordpress.output.xml";
ConvertWordpressExport(inpath, outpath);
}
private static void ConvertWordpressExport(string inpath, string outpath)
{
XmlDocument doc = new XmlDocument();
doc.Load(inpath);
XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("content", "http://purl.org/rss/1.0/modules/content/");
XmlNodeList nodes = doc.SelectNodes("/rss/channel/item/content:encoded", nsmgr);
foreach (XmlNode n in nodes)
{
string newText = ProcessBlogPost(n.InnerText);
n.InnerText = null;
n.AppendChild(doc.CreateCDataSection(newText));
}
doc.Save(outpath);
Console.WriteLine("Done");
Console.ReadLine();
}
private static Regex findImgTag = new Regex("<img", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static Regex findEndImg = new Regex("/>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static string ProcessBlogPost(string blogPost)
{
StringBuilder output = new StringBuilder();
int pos = 0;
while (true)
{
Match startImg = findImgTag.Match(blogPost, pos);
if (!startImg.Success)
{
output.Append(blogPost.Substring(pos));
break;
}
else
{
output.Append(blogPost.Substring(pos, startImg.Index - pos));
Match endImg = findEndImg.Match(blogPost, startImg.Index);
pos = endImg.Index + endImg.Length;
string imgTag = blogPost.Substring(startImg.Index, pos - startImg.Index);
ImgTagProcessor p = new ImgTagProcessor(imgTag);
output.Append(p.Process());
}
}
return output.ToString();
}
class ImgTagProcessor
{
static Regex findAtts = new Regex(@"(?<Att>\w+)=""(?<Value>[^""]*)""", RegexOptions.Compiled | RegexOptions.IgnoreCase);
static Regex queryW = new Regex(@"w=(\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
static Regex queryH = new Regex(@"h=(\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
string imgTag;
string width;
string height;
internal ImgTagProcessor(string imgTag)
{
this.imgTag = imgTag;
}
internal string Process()
{
// Extract width and height info
foreach (Match m in findAtts.Matches(imgTag))
{
switch (m.Groups["Att"].Value)
{
case "width":
this.width = m.Groups["Value"].Value;
break;
case "height":
this.height = m.Groups["Value"].Value;
break;
case "src":
Uri uri = new Uri(m.Groups["Value"].Value);
string query = uri.Query;
if (!String.IsNullOrEmpty(query))
{
Match matchW = queryW.Match(query);
Match matchH = queryH.Match(query);
if (matchW.Success)
width = matchW.Groups[1].Value;
if (matchH.Success)
height = matchH.Groups[1].Value;
}
break;
}
}
return findAtts.Replace(imgTag, new MatchEvaluator(EvaluateAttributeMatch));
}
string EvaluateAttributeMatch(Match m)
{
switch (m.Groups["Att"].Value)
{
case "src":
UriBuilder uri = new UriBuilder(m.Groups["Value"].Value);
List<string> queryItems = new List<string>();
if (width != null)
queryItems.Add("w=" + width);
if (height != null)
queryItems.Add("h=" + height);
uri.Query = String.Join("&", queryItems.ToArray());
return "src=\"" + uri.ToString() + "\"";
default:
return m.Value;
}
}
}
}
}