Skip to content

Commit

Permalink
Parse emails from meetup.com
Browse files Browse the repository at this point in the history
  • Loading branch information
psfblair committed Jun 25, 2014
1 parent 4d43450 commit 79ac620
Show file tree
Hide file tree
Showing 14 changed files with 1,228 additions and 10 deletions.
4 changes: 4 additions & 0 deletions App/App.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
<Project>{166E52EB-4E26-4074-A695-63D071AC135F}</Project>
<Name>Persistence</Name>
</ProjectReference>
<ProjectReference Include="..\MeetupParser\MeetupParser.fsproj">
<Project>{561CEF0A-3584-44FE-B5F8-0F574E1019B0}</Project>
<Name>MeetupParser</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
Expand Down
6 changes: 4 additions & 2 deletions App/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ let usage() =
printfn ""
printfn " .denbow - extension for Frank Denbow's mails"
printfn " .odonnell - extension for Charlie O'Donnell's mails"
printfn " .meetup - extension for mails from meetup.com"
printfn ""
printfn "Output is written into a file named Events.html in the "
printfn "current working directory."

let selectParseFunction (fileName: string) =
match fileName with
| filename when filename.EndsWith(".denbow") -> DenbowParser.Parser.parseMail
| filename when filename.EndsWith(".odonnell") -> ODonnellParser.Parser.parseMail
| filename when filename.EndsWith(".denbow") -> DenbowParser.Parser.parseMail
| filename when filename.EndsWith(".odonnell") -> ODonnellParser.Parser.parseMail
| filename when filename.EndsWith(".meetup") -> MeetupParser.Parser.parseMail
| _ -> sprintf "Unrecognized file extension for file: %s" fileName |> failwith

let loadDataFrom (filename: string) =
Expand Down
4 changes: 2 additions & 2 deletions DenbowParser/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ let extractTitleFrom (eventHeader: string) =
eventHeader.Substring(startIndex).Trim()

let dateAndTimeFrom (dateTimeString: string) =
let normalized = dateTimeString |> regexReplace " +" " "
let normalized = dateTimeString |> normalizeSpace
|> regexReplace @"\s+:" ":" //No space around colons in time
|> regexReplace @":\s+" ":"
|> regexReplaceIgnoreCase @"\s+am\s+" "am " //No space before am or pm
Expand All @@ -67,7 +67,7 @@ let dateAndTimeFrom (dateTimeString: string) =


let containsCalendarLink (descriptionLine: string) =
let normalizedLine = descriptionLine.ToLower() |> regexReplace " +" " "
let normalizedLine = descriptionLine.ToLower() |> normalizeSpace
normalizedLine.Contains("view in calendar")

let removeCalendarLink (descriptionLine: string) =
Expand Down
6 changes: 6 additions & 0 deletions EmailParser.sln
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Project("{f2a71f9b-5d33-465a-a702-920d77279786}") = "DenbowParser", "DenbowParse
EndProject
Project("{f2a71f9b-5d33-465a-a702-920d77279786}") = "Utils", "Utils\Utils.fsproj", "{EFAA7888-F46F-4E5B-9020-B03B90422E6F}"
EndProject
Project("{f2a71f9b-5d33-465a-a702-920d77279786}") = "MeetupParser", "MeetupParser\MeetupParser.fsproj", "{561CEF0A-3584-44FE-B5F8-0F574E1019B0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -33,6 +35,10 @@ Global
{427B4B20-D0A6-4AA9-933A-F7D23F76DF4E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{427B4B20-D0A6-4AA9-933A-F7D23F76DF4E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{427B4B20-D0A6-4AA9-933A-F7D23F76DF4E}.Release|Any CPU.Build.0 = Release|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{561CEF0A-3584-44FE-B5F8-0F574E1019B0}.Release|Any CPU.Build.0 = Release|Any CPU
{59D4D8A6-CCF5-449E-A5FC-00078D897A3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{59D4D8A6-CCF5-449E-A5FC-00078D897A3A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{59D4D8A6-CCF5-449E-A5FC-00078D897A3A}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down
6 changes: 5 additions & 1 deletion EmailParser.userprefs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
<Properties>
<MonoDevelop.Ide.Workspace ActiveConfiguration="Debug" />
<MonoDevelop.Ide.Workbench />
<MonoDevelop.Ide.Workbench ActiveDocument="MeetupParser/Script.fsx">
<Files>
<File FileName="MeetupParser/Script.fsx" Line="5" Column="1" />
</Files>
</MonoDevelop.Ide.Workbench>
<MonoDevelop.Ide.DebuggingService.Breakpoints>
<BreakpointStore>
<Breakpoint file="/Users/paulblair/Documents/workspace-Monodevelop/EmailParser/DbLoaderDebugger/Program.fs" line="5" column="1" />
Expand Down
407 changes: 407 additions & 0 deletions MeetupParser/Email.meetup

Large diffs are not rendered by default.

508 changes: 508 additions & 0 deletions MeetupParser/Email2.meetup

Large diffs are not rendered by default.

72 changes: 72 additions & 0 deletions MeetupParser/MeetupParser.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{561CEF0A-3584-44FE-B5F8-0F574E1019B0}</ProjectGuid>
<OutputType>Library</OutputType>
<RootNamespace>MeetupParser</RootNamespace>
<AssemblyName>MeetupParser</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug</OutputPath>
<DefineConstants>DEBUG</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<ConsolePause>false</ConsolePause>
<Tailcalls>false</Tailcalls>
<PlatformTarget>
</PlatformTarget>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release</OutputPath>
<ErrorReport>prompt</ErrorReport>
<PlatformTarget>
</PlatformTarget>
<ConsolePause>false</ConsolePause>
<Tailcalls>true</Tailcalls>
</PropertyGroup>
<ItemGroup>
<Reference Include="mscorlib" />
<Reference Include="FSharp.Core" />
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Numerics" />
<Reference Include="MimeKitLite">
<HintPath>..\packages\MimeKitLite.0.36.0.0\lib\net40\MimeKitLite.dll</HintPath>
</Reference>
</ItemGroup>
<ItemGroup>
<Compile Include="Utils.fs" />
<Compile Include="Parser.fs" />
<Compile Include="Script.fsx" />
</ItemGroup>
<Import Project="$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.1\Framework\v4.0\Microsoft.FSharp.Targets" />
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\ParserTypes\ParserTypes.fsproj">
<Project>{D7E39B94-578A-43D2-9FCE-37D8898B6614}</Project>
<Name>ParserTypes</Name>
</ProjectReference>
<ProjectReference Include="..\Utils\Utils.fsproj">
<Project>{EFAA7888-F46F-4E5B-9020-B03B90422E6F}</Project>
<Name>Utils</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="Email2.meetup">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Email.meetup">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
</Project>
93 changes: 93 additions & 0 deletions MeetupParser/Parser.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
module MeetupParser.Parser

open MimeKit
open EmailParser.Types
open EmailParser.Utils.Collections
open EmailParser.Utils.Text
open EmailParser.Utils.Uri

open MeetupParser.Utils


type State =
| Header
| EventTitle
| EventDateTime
| EventLocation
| EventDescription
| RSVPLink
| MessageTrailer

type MessagePart =
| TitlePart of string
| DateTimePart of string
| LocationPart of list<string>
| DescriptionPart of list<string>
| RsvpLinkPart of string

let rec parse (mail: seq<string>) (state: State) : list<MessagePart> =
let mailText = List.ofSeq mail
match state with
| Header -> header mailText
| EventTitle -> eventTitle mailText
| EventDateTime -> eventDateTime mailText
| EventLocation -> eventLocation mailText
| EventDescription -> eventDescription mailText
| RSVPLink -> rsvpLink mailText
| MessageTrailer -> messageTrailer mailText

and header (mailText: list<string>) =
let headerLines, rest = mailText |> takeAndSkipUntil startsWithTitle
(parse rest EventTitle)

and eventTitle (mailText: list<string>) =
let title = mailText.Head |> extractTitleFrom |> TitlePart
let ignore, rest = mailText |> takeAndSkipUntil startsWithEventDate
title :: (parse rest EventDateTime)

and eventDateTime (mailText: list<string>) =
let date = mailText.Head |> extractDateTimeFrom |> DateTimePart
let ignore, rest = mailText |> takeAndSkipUntil startsWithLocation
date :: (parse rest EventLocation)

and eventLocation (mailText: list<string>) =
let locationLines, rest = mailText.Tail |> takeAndSkipUntil isBlank
let location = locationLines |> List.ofSeq |> LocationPart
location :: (parse rest EventDescription)

and eventDescription (mailText: list<string>) =
let descriptionLines, rest = mailText.Tail |> takeAndSkipUntil isRsvpLine
let description = descriptionLines |> List.ofSeq |> DescriptionPart
description :: (parse rest RSVPLink)

and rsvpLink (mailText: list<string>) =
let link = mailText.Head |> extractRsvpLinkFrom |> RsvpLinkPart
link :: (parse mailText.Tail MessageTrailer)

and messageTrailer (mailText: list<string>) = []

let calendarEntryFrom (messageParts: seq<MessagePart>) =

let title = messageParts |> extractWithEmptyStringDefault (function | TitlePart(aTitle) -> Some(aTitle) | _ -> None)
let date = messageParts |> extractWithEmptyStringDefault (function | DateTimePart(dateTime) -> Some(dateTime) | _ -> None)
let location = messageParts |> extractWithEmptyStringDefault (function | LocationPart(loc) -> Some(String.concat "\n" loc) | _ -> None)
let description = messageParts |> extractWithEmptyStringDefault (function | DescriptionPart(desc) -> Some(String.concat "\n" desc) | _ -> None)
let rsvp = messageParts |> extractWithEmptyStringDefault (function | RsvpLinkPart(link) -> Some(link) | _ -> None)

{
EventDate = (dateAndTimeFrom date);
EventTitle = title;
EventLocation = Some(location);
EventDescription = description.Trim();
RsvpLink = (uriFrom rsvp)
}

let parseIntoEmailData (sender: string) (sentDate: System.DateTime) (messageParts: list<MessagePart>) : EmailData =
let calendarEntries = [ calendarEntryFrom messageParts ]

{ MailDate = sentDate; MailSender = sender; MailIntro = ""; CalendarEntries = calendarEntries }

let parseMail (message: MimeMessage) : EmailData =
let messageData = messageDataFor message
let messageParts = parse messageData.MessageLines Header
parseIntoEmailData messageData.Sender messageData.SentDate messageParts
27 changes: 27 additions & 0 deletions MeetupParser/Script.fsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#I "bin/Debug/"
#r "ParserTypes.dll"
#r "EmailParserUtils.dll"
#r "MimeKitLite.dll"

System.Environment.CurrentDirectory <- __SOURCE_DIRECTORY__

#load "Utils.fs"
#load "Parser.fs"

open EmailParser.Utils.Mime
open EmailParser.Utils.Text
open EmailParser.Utils.Collections

open MeetupParser.Utils
open MeetupParser.Parser

let message = System.IO.File.ReadAllText("Email.meetup") |> loadMimeMessageFrom

let messageData = messageDataFor message
let messageLines = messageData.MessageLines

let message2 = System.IO.File.ReadAllText("Email2.meetup") |> loadMimeMessageFrom
let message2Data = messageDataFor message2

let messageParts = parse messageLines Header
let parsed = parseMail message
61 changes: 61 additions & 0 deletions MeetupParser/Utils.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
module MeetupParser.Utils

open System
open System.Text.RegularExpressions
open MimeKit
open EmailParser.Utils.Mime
open EmailParser.Utils.Date
open EmailParser.Utils.Text
open EmailParser.Types

let messageDataFor (message: MimeMessage) =
{
Sender = senderOf message;
SentDate = dateOf message;
MessageLines = (textOf message |> splitIntoLines)
}

let startsWithTitle (line: string) = line.Trim().StartsWith("What:")

let extractTitleFrom (titleLine: string) =
let startIndex = titleLine.IndexOf(": ") + 2
titleLine.Substring(startIndex).Trim()

let startsWithEventDate (line: string) = line.Trim().StartsWith("When:")

let extractDateTimeFrom (dateTimeLine: string) =
let startIndex = dateTimeLine.IndexOf(": ") + 2
dateTimeLine.Substring(startIndex).Trim()

let startsWithLocation (line: string) = line.Trim().StartsWith("Where:")

let isRsvpLine (line: string) = line.Trim() |> normalizeSpace |> (fun str -> str.StartsWith("Click here to say"))

let extractRsvpLinkFrom (rsvpLine: string) =
let startIndex = rsvpLine.IndexOf("http")
rsvpLine.Substring(startIndex).Trim()

let dateFrom = function
| dayOfWeek :: month :: day :: year :: rest when isDayOfWeek dayOfWeek && isMonth month && isYear year -> dateFromMonthDayYear month day year
| dayOfWeek :: month :: day :: rest when isDayOfWeek dayOfWeek && isMonth month -> dateFromMonthDay month day
| month :: day :: year :: rest when isMonth month && isYear year -> dateFromMonthDayYear month day year
| month :: day :: rest when isMonth month -> dateFromMonthDay month day
| other -> failwith(sprintf "unable to parse date: %A" other)

let dateAndTimeFrom (dateTimeString: string) =
let normalized = dateTimeString.Trim() |> normalizeSpace
|> regexReplace @"\s+:" ":" //No space around colons in time
|> regexReplace @":\s+" ":"
|> regexReplace @"\s+," "," //No space before commas
|> regexReplaceIgnoreCase @"\s+am\s*" "am" //No space before am or pm
|> regexReplaceIgnoreCase @"\s+pm\s*" "pm"

let parts = normalized.Split(' ')

let date = List.ofArray parts |> dateFrom

match hoursAndMinutesFrom parts.[parts.Length - 1] with
| Some(hours, minutes) ->
let dateTime = date.AddHours(hours).AddMinutes(minutes)
{ Date = dateTime ; Time = Some(dateTime) }
| None -> { Date = date ; Time = None }
4 changes: 4 additions & 0 deletions MeetupParser/packages.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="MimeKitLite" version="0.36.0.0" targetFramework="net45" />
</packages>
Loading

0 comments on commit 79ac620

Please sign in to comment.