Skip to content

Commit

Permalink
Merge pull request #3 from psfblair/master
Browse files Browse the repository at this point in the history
Ability to parse emails from meetup.com ; storage of entire original message in database
  • Loading branch information
psfblair committed Jun 25, 2014
2 parents 0fdd15a + c91d0cb commit 515c5e4
Show file tree
Hide file tree
Showing 24 changed files with 1,260 additions and 31 deletions.
4 changes: 4 additions & 0 deletions App/App.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
<Project>{166E52EB-4E26-4074-A695-63D071AC135F}</Project>
<Name>Persistence</Name>
</ProjectReference>
<ProjectReference Include="..\MeetupParser\MeetupParser.fsproj">
<Project>{561CEF0A-3584-44FE-B5F8-0F574E1019B0}</Project>
<Name>MeetupParser</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
Expand Down
8 changes: 5 additions & 3 deletions App/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,23 @@ let usage() =
printfn ""
printfn " .denbow - extension for Frank Denbow's mails"
printfn " .odonnell - extension for Charlie O'Donnell's mails"
printfn " .meetup - extension for mails from meetup.com"
printfn ""
printfn "Output is written into a file named Events.html in the "
printfn "current working directory."

let selectParseFunction (fileName: string) =
match fileName with
| filename when filename.EndsWith(".denbow") -> DenbowParser.Parser.parseMail
| filename when filename.EndsWith(".odonnell") -> ODonnellParser.Parser.parseMail
| filename when filename.EndsWith(".denbow") -> DenbowParser.Parser.parseMail
| filename when filename.EndsWith(".odonnell") -> ODonnellParser.Parser.parseMail
| filename when filename.EndsWith(".meetup") -> MeetupParser.Parser.parseMail
| _ -> sprintf "Unrecognized file extension for file: %s" fileName |> failwith

let loadDataFrom (filename: string) =
let inputString = System.IO.File.ReadAllText(filename)
let message = loadMimeMessageFrom(inputString)
let parseFunction = selectParseFunction filename
let parsed = parseFunction message
let parsed = parseFunction message inputString
loadMail parsed
()

Expand Down
10 changes: 5 additions & 5 deletions DenbowParser/Parser.fs
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,14 @@ let rec calendarEntriesFrom (messageParts: list<MessagePart>) : list<CalendarEnt
| RsvpLinkPart(_) :: items -> calendarEntry :: (calendarEntriesFrom remainingParts)
| _ -> sprintf "Nonempty list not starting with RSVP link part: [%s]" (remainingParts.ToString()) |> failwith

let parseIntoEmailData (sender: string) (sentDate: System.DateTime) (messageParts: list<MessagePart>) : EmailData =
let parseIntoEmailData (sender: string) (sentDate: System.DateTime) (originalMessageString: string) (messageParts: list<MessagePart>) : EmailData =
let intro = messageParts |> extractWithEmptyStringDefault (function | IntroPart(intro) -> Some(String.concat "\n" intro) | _ -> None)
let nonIntroParts = messageParts |> List.filter (function |IntroPart(_) -> false | _ -> true)
let calendarEntries = calendarEntriesFrom nonIntroParts

{ MailDate = sentDate; MailSender = sender; MailIntro = intro; CalendarEntries = calendarEntries }
{ MailDate = sentDate; MailSender = sender; MailIntro = intro; OriginalMessage = originalMessageString; CalendarEntries = calendarEntries }

let parseMail (message: MimeMessage) : EmailData =
let messageData = messageDataFor message
let parseMail (message: MimeMessage) (originalMessageString: string) : EmailData =
let messageData = messageDataFor message originalMessageString
let messageParts = parse messageData.MessageLines PreIntro
parseIntoEmailData messageData.Sender messageData.SentDate messageParts
parseIntoEmailData messageData.Sender messageData.SentDate originalMessageString messageParts
5 changes: 3 additions & 2 deletions DenbowParser/Script.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ open System.Text.RegularExpressions
open DenbowParser.Parser
open DenbowParser.Utils

let message = System.IO.File.ReadAllText("Email.denbow") |> loadMimeMessageFrom
let inputString = System.IO.File.ReadAllText("Email.denbow")
let message = inputString |> loadMimeMessageFrom

let messageData = messageDataFor message
let messageData = messageDataFor message inputString
let messageLines = messageData.MessageLines

let messageParts = parse messageLines PreIntro
Expand Down
9 changes: 5 additions & 4 deletions DenbowParser/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ open EmailParser.Utils.Text
open EmailParser.Utils.Date
open EmailParser.Types

let messageDataFor (message: MimeMessage) =
let messageDataFor (message: MimeMessage) (originalMessageString: string) =
let messageLines = htmlPartsOf message |> String.concat "\n" |> toPlainText |> splitIntoLines
{
Sender = senderOf message;
SentDate = dateOf message;
MessageLines = messageLines
MessageLines = messageLines;
EntireMessage = originalMessageString
}

let startsWithEventDate (line: string) =
Expand Down Expand Up @@ -48,7 +49,7 @@ let extractTitleFrom (eventHeader: string) =
eventHeader.Substring(startIndex).Trim()

let dateAndTimeFrom (dateTimeString: string) =
let normalized = dateTimeString |> regexReplace " +" " "
let normalized = dateTimeString |> normalizeSpace
|> regexReplace @"\s+:" ":" //No space around colons in time
|> regexReplace @":\s+" ":"
|> regexReplaceIgnoreCase @"\s+am\s+" "am " //No space before am or pm
Expand All @@ -67,7 +68,7 @@ let dateAndTimeFrom (dateTimeString: string) =


let containsCalendarLink (descriptionLine: string) =
let normalizedLine = descriptionLine.ToLower() |> regexReplace " +" " "
let normalizedLine = descriptionLine.ToLower() |> normalizeSpace
normalizedLine.Contains("view in calendar")

let removeCalendarLink (descriptionLine: string) =
Expand Down
6 changes: 6 additions & 0 deletions EmailParser.sln
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Project("{f2a71f9b-5d33-465a-a702-920d77279786}") = "DenbowParser", "DenbowParse
EndProject
Project("{f2a71f9b-5d33-465a-a702-920d77279786}") = "Utils", "Utils\Utils.fsproj", "{EFAA7888-F46F-4E5B-9020-B03B90422E6F}"
EndProject
Project("{f2a71f9b-5d33-465a-a702-920d77279786}") = "MeetupParser", "MeetupParser\MeetupParser.fsproj", "{561CEF0A-3584-44FE-B5F8-0F574E1019B0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -33,6 +35,10 @@ Global
{427B4B20-D0A6-4AA9-933A-F7D23F76DF4E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{427B4B20-D0A6-4AA9-933A-F7D23F76DF4E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{427B4B20-D0A6-4AA9-933A-F7D23F76DF4E}.Release|Any CPU.Build.0 = Release|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Release|Any CPU.Build.0 = Release|Any CPU
{59D4D8A6-CCF5-449E-A5FC-00078D897A3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{59D4D8A6-CCF5-449E-A5FC-00078D897A3A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{59D4D8A6-CCF5-449E-A5FC-00078D897A3A}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down
6 changes: 5 additions & 1 deletion EmailParser.userprefs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
<Properties>
<MonoDevelop.Ide.Workspace ActiveConfiguration="Debug" />
<MonoDevelop.Ide.Workbench />
<MonoDevelop.Ide.Workbench ActiveDocument="MeetupParser/Script.fsx">
<Files>
<File FileName="MeetupParser/Script.fsx" Line="5" Column="1" />
</Files>
</MonoDevelop.Ide.Workbench>
<MonoDevelop.Ide.DebuggingService.Breakpoints>
<BreakpointStore>
<Breakpoint file="/Users/paulblair/Documents/workspace-Monodevelop/EmailParser/DbLoaderDebugger/Program.fs" line="5" column="1" />
Expand Down
Loading

0 comments on commit 515c5e4

Please sign in to comment.