diff --git a/Pipfile b/Pipfile
new file mode 100755
index 0000000..595a01f
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,27 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+kiwisolver = ">=1.0.1"
+lxml = ">=4.2.3"
+matplotlib = ">=2.2.2"
+numpy = ">=1.14.5"
+pandas = ">=0.23.3"
+pyopenms = ">=2.3.0.4"
+pyparsing = ">=2.2.0"
+pyteomics = ">=3.5.1"
+python-dateutil = ">=2.7.3"
+pytz = ">=2018.5"
+scipy = ">=1.1.0"
+seaborn = ">=0.9.0"
+six = ">=1.11.0"
+subprocess32 = ">=3.5.2"
+"backports.functools_lru_cache" = ">=1.5"
+Cycler = ">=0.10.0"
+
+[requires]
+python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100755
index 0000000..2058dcb
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,267 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "4c7eaca64c1e2d03c4e81329915936b0f9a7d38392910fe4cfcd4ba2bba3e59a"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.7"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "backports.functools-lru-cache": {
+            "hashes": [
+                "sha256:9d98697f088eb1b0fa451391f91afb5e3ebde16bbdb272819fd091151fda4f1a",
+                "sha256:f0b0e4eba956de51238e17573b7087e852dfe9854afd2e9c873f73fc0ca0a6dd"
+            ],
+            "index": "pypi",
+            "version": "==1.5"
+        },
+        "cycler": {
+            "hashes": [
+                "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d",
+                "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"
+            ],
+            "index": "pypi",
+            "version": "==0.10.0"
+        },
+        "kiwisolver": {
+            "hashes": [
+                "sha256:05b5b061e09f60f56244adc885c4a7867da25ca387376b02c1efc29cc16bcd0f",
+                "sha256:26f4fbd6f5e1dabff70a9ba0d2c4bd30761086454aa30dddc5b52764ee4852b7",
+                "sha256:3b2378ad387f49cbb328205bda569b9f87288d6bc1bf4cd683c34523a2341efe",
+                "sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c",
+                "sha256:47b8cb81a7d18dbaf4fed6a61c3cecdb5adec7b4ac292bddb0d016d57e8507d5",
+                "sha256:53eaed412477c836e1b9522c19858a8557d6e595077830146182225613b11a75",
+                "sha256:58e626e1f7dfbb620d08d457325a4cdac65d1809680009f46bf41eaf74ad0187",
+                "sha256:5a52e1b006bfa5be04fe4debbcdd2688432a9af4b207a3f429c74ad625022641",
+                "sha256:5c7ca4e449ac9f99b3b9d4693debb1d6d237d1542dd6a56b3305fe8a9620f883",
+                "sha256:682e54f0ce8f45981878756d7203fd01e188cc6c8b2c5e2cf03675390b4534d5",
+                "sha256:79bfb2f0bd7cbf9ea256612c9523367e5ec51d7cd616ae20ca2c90f575d839a2",
+                "sha256:7f4dd50874177d2bb060d74769210f3bce1af87a8c7cf5b37d032ebf94f0aca3",
+                "sha256:8944a16020c07b682df861207b7e0efcd2f46c7488619cb55f65882279119389",
+                "sha256:8aa7009437640beb2768bfd06da049bad0df85f47ff18426261acecd1cf00897",
+                "sha256:939f36f21a8c571686eb491acfffa9c7f1ac345087281b412d63ea39ca14ec4a",
+                "sha256:9733b7f64bd9f807832d673355f79703f81f0b3e52bfce420fc00d8cb28c6a6c",
+                "sha256:a02f6c3e229d0b7220bd74600e9351e18bc0c361b05f29adae0d10599ae0e326",
+                "sha256:a0c0a9f06872330d0dd31b45607197caab3c22777600e88031bfe66799e70bb0",
+                "sha256:acc4df99308111585121db217681f1ce0eecb48d3a828a2f9bbf9773f4937e9e",
+                "sha256:b64916959e4ae0ac78af7c3e8cef4becee0c0e9694ad477b4c6b3a536de6a544",
+                "sha256:d3fcf0819dc3fea58be1fd1ca390851bdb719a549850e708ed858503ff25d995",
+                "sha256:d52e3b1868a4e8fd18b5cb15055c76820df514e26aa84cc02f593d99fef6707f",
+                "sha256:db1a5d3cc4ae943d674718d6c47d2d82488ddd94b93b9e12d24aabdbfe48caee",
+                "sha256:e3a21a720791712ed721c7b95d433e036134de6f18c77dbe96119eaf7aa08004",
+                "sha256:e8bf074363ce2babeb4764d94f8e65efd22e6a7c74860a4f05a6947afc020ff2",
+                "sha256:f16814a4a96dc04bf1da7d53ee8d5b1d6decfc1a92a63349bb15d37b6a263dd9",
+                "sha256:f2b22153870ca5cf2ab9c940d7bc38e8e9089fa0f7e5856ea195e1cf4ff43d5a",
+                "sha256:f790f8b3dff3d53453de6a7b7ddd173d2e020fb160baff578d578065b108a05f"
+            ],
+            "index": "pypi",
+            "version": "==1.1.0"
+        },
+        "lxml": {
+            "hashes": [
+                "sha256:06c7616601430aa140a69f97e3116308fffe0848f543b639a5ec2e8920ae72fd",
+                "sha256:177202792f9842374a8077735c69c41a4282183f7851443d2beb8ee310720819",
+                "sha256:19317ad721ceb9e39847d11131903931e2794e447d4751ebb0d9236f1b349ff2",
+                "sha256:36d206e62f3e5dbaafd4ec692b67157e271f5da7fd925fda8515da675eace50d",
+                "sha256:387115b066c797c85f9861a9613abf50046a15aac16759bc92d04f94acfad082",
+                "sha256:3ce1c49d4b4a7bc75fb12acb3a6247bb7a91fe420542e6d671ba9187d12a12c2",
+                "sha256:4d2a5a7d6b0dbb8c37dab66a8ce09a8761409c044017721c21718659fa3365a1",
+                "sha256:58d0a1b33364d1253a88d18df6c0b2676a1746d27c969dc9e32d143a3701dda5",
+                "sha256:62a651c618b846b88fdcae0533ec23f185bb322d6c1845733f3123e8980c1d1b",
+                "sha256:69ff21064e7debc9b1b1e2eee8c2d686d042d4257186d70b338206a80c5bc5ea",
+                "sha256:7060453eba9ba59d821625c6af6a266bd68277dce6577f754d1eb9116c094266",
+                "sha256:7d26b36a9c4bce53b9cfe42e67849ae3c5c23558bc08363e53ffd6d94f4ff4d2",
+                "sha256:83b427ad2bfa0b9705e02a83d8d607d2c2f01889eb138168e462a3a052c42368",
+                "sha256:923d03c84534078386cf50193057aae98fa94cace8ea7580b74754493fda73ad",
+                "sha256:b773715609649a1a180025213f67ffdeb5a4878c784293ada300ee95a1f3257b",
+                "sha256:baff149c174e9108d4a2fee192c496711be85534eab63adb122f93e70aa35431",
+                "sha256:bca9d118b1014b4c2d19319b10a3ebed508ff649396ce1855e1c96528d9b2fa9",
+                "sha256:ce580c28845581535dc6000fc7c35fdadf8bea7ccb57d6321b044508e9ba0685",
+                "sha256:d34923a569e70224d88e6682490e24c842907ba2c948c5fd26185413cbe0cd96",
+                "sha256:dd9f0e531a049d8b35ec5e6c68a37f1ba6ec3a591415e6804cbdf652793d15d7",
+                "sha256:ecb805cbfe9102f3fd3d2ef16dfe5ae9e2d7a7dfbba92f4ff1e16ac9784dbfb0",
+                "sha256:ede9aad2197a0202caff35d417b671f5f91a3631477441076082a17c94edd846",
+                "sha256:ef2d1fc370400e0aa755aab0b20cf4f1d0e934e7fd5244f3dd4869078e4942b9",
+                "sha256:f2fec194a49bfaef42a548ee657362af5c7a640da757f6f452a35da7dd9f923c"
+            ],
+            "index": "pypi",
+            "version": "==4.3.4"
+        },
+        "matplotlib": {
+            "hashes": [
+                "sha256:1febd22afe1489b13c6749ea059d392c03261b2950d1d45c17e3aed812080c93",
+                "sha256:31a30d03f39528c79f3a592857be62a08595dec4ac034978ecd0f814fa0eec2d",
+                "sha256:4442ce720907f67a79d45de9ada47be81ce17e6c2f448b3c64765af93f6829c9",
+                "sha256:796edbd1182cbffa7e1e7a97f1e141f875a8501ba8dd834269ae3cd45a8c976f",
+                "sha256:934e6243df7165aad097572abf5b6003c77c9b6c480c3c4de6f2ef1b5fdd4ec0",
+                "sha256:bab9d848dbf1517bc58d1f486772e99919b19efef5dd8596d4b26f9f5ee08b6b",
+                "sha256:c1fe1e6cdaa53f11f088b7470c2056c0df7d80ee4858dadf6cbe433fcba4323b",
+                "sha256:e5b8aeca9276a3a988caebe9f08366ed519fff98f77c6df5b64d7603d0e42e36",
+                "sha256:ec6bd0a6a58df3628ff269978f4a4b924a0d371ad8ce1f8e2b635b99e482877a"
+            ],
+            "index": "pypi",
+            "version": "==3.1.1"
+        },
+        "numpy": {
+            "hashes": [
+                "sha256:0778076e764e146d3078b17c24c4d89e0ecd4ac5401beff8e1c87879043a0633",
+                "sha256:141c7102f20abe6cf0d54c4ced8d565b86df4d3077ba2343b61a6db996cefec7",
+                "sha256:14270a1ee8917d11e7753fb54fc7ffd1934f4d529235beec0b275e2ccf00333b",
+                "sha256:27e11c7a8ec9d5838bc59f809bfa86efc8a4fd02e58960fa9c49d998e14332d5",
+                "sha256:2a04dda79606f3d2f760384c38ccd3d5b9bb79d4c8126b67aff5eb09a253763e",
+                "sha256:3c26010c1b51e1224a3ca6b8df807de6e95128b0908c7e34f190e7775455b0ca",
+                "sha256:52c40f1a4262c896420c6ea1c6fda62cf67070e3947e3307f5562bd783a90336",
+                "sha256:6e4f8d9e8aa79321657079b9ac03f3cf3fd067bf31c1cca4f56d49543f4356a5",
+                "sha256:7242be12a58fec245ee9734e625964b97cf7e3f2f7d016603f9e56660ce479c7",
+                "sha256:7dc253b542bfd4b4eb88d9dbae4ca079e7bf2e2afd819ee18891a43db66c60c7",
+                "sha256:94f5bd885f67bbb25c82d80184abbf7ce4f6c3c3a41fbaa4182f034bba803e69",
+                "sha256:a89e188daa119ffa0d03ce5123dee3f8ffd5115c896c2a9d4f0dbb3d8b95bfa3",
+                "sha256:ad3399da9b0ca36e2f24de72f67ab2854a62e623274607e37e0ce5f5d5fa9166",
+                "sha256:b0348be89275fd1d4c44ffa39530c41a21062f52299b1e3ee7d1c61f060044b8",
+                "sha256:b5554368e4ede1856121b0dfa35ce71768102e4aa55e526cb8de7f374ff78722",
+                "sha256:cbddc56b2502d3f87fda4f98d948eb5b11f36ff3902e17cb6cc44727f2200525",
+                "sha256:d79f18f41751725c56eceab2a886f021d70fd70a6188fd386e29a045945ffc10",
+                "sha256:dc2ca26a19ab32dc475dbad9dfe723d3a64c835f4c23f625c2b6566ca32b9f29",
+                "sha256:dd9bcd4f294eb0633bb33d1a74febdd2b9018b8b8ed325f861fffcd2c7660bb8",
+                "sha256:e8baab1bc7c9152715844f1faca6744f2416929de10d7639ed49555a85549f52",
+                "sha256:ec31fe12668af687b99acf1567399632a7c47b0e17cfb9ae47c098644ef36797",
+                "sha256:f12b4f7e2d8f9da3141564e6737d79016fe5336cc92de6814eba579744f65b0a",
+                "sha256:f58ac38d5ca045a377b3b377c84df8175ab992c970a53332fa8ac2373df44ff7"
+            ],
+            "index": "pypi",
+            "version": "==1.16.4"
+        },
+        "pandas": {
+            "hashes": [
+                "sha256:071e42b89b57baa17031af8c6b6bbd2e9a5c68c595bc6bf9adabd7a9ed125d3b",
+                "sha256:17450e25ae69e2e6b303817bdf26b2cd57f69595d8550a77c308be0cd0fd58fa",
+                "sha256:17916d818592c9ec891cbef2e90f98cc85e0f1e89ed0924c9b5220dc3209c846",
+                "sha256:2538f099ab0e9f9c9d09bbcd94b47fd889bad06dc7ae96b1ed583f1dc1a7a822",
+                "sha256:366f30710172cb45a6b4f43b66c220653b1ea50303fbbd94e50571637ffb9167",
+                "sha256:42e5ad741a0d09232efbc7fc648226ed93306551772fc8aecc6dce9f0e676794",
+                "sha256:4e718e7f395ba5bfe8b6f6aaf2ff1c65a09bb77a36af6394621434e7cc813204",
+                "sha256:4f919f409c433577a501e023943e582c57355d50a724c589e78bc1d551a535a2",
+                "sha256:4fe0d7e6438212e839fc5010c78b822664f1a824c0d263fd858f44131d9166e2",
+                "sha256:5149a6db3e74f23dc3f5a216c2c9ae2e12920aa2d4a5b77e44e5b804a5f93248",
+                "sha256:627594338d6dd995cfc0bacd8e654cd9e1252d2a7c959449228df6740d737eb8",
+                "sha256:83c702615052f2a0a7fb1dd289726e29ec87a27272d775cb77affe749cca28f8",
+                "sha256:8c872f7fdf3018b7891e1e3e86c55b190e6c5cee70cab771e8f246c855001296",
+                "sha256:90f116086063934afd51e61a802a943826d2aac572b2f7d55caaac51c13db5b5",
+                "sha256:a3352bacac12e1fc646213b998bce586f965c9d431773d9e91db27c7c48a1f7d",
+                "sha256:bcdd06007cca02d51350f96debe51331dec429ac8f93930a43eb8fb5639e3eb5",
+                "sha256:c1bd07ebc15285535f61ddd8c0c75d0d6293e80e1ee6d9a8d73f3f36954342d0",
+                "sha256:c9a4b7c55115eb278c19aa14b34fcf5920c8fe7797a09b7b053ddd6195ea89b3",
+                "sha256:cc8fc0c7a8d5951dc738f1c1447f71c43734244453616f32b8aa0ef6013a5dfb",
+                "sha256:d7b460bc316064540ce0c41c1438c416a40746fd8a4fb2999668bf18f3c4acf1"
+            ],
+            "index": "pypi",
+            "version": "==0.24.2"
+        },
+        "pyopenms": {
+            "hashes": [
+                "sha256:611669bddb6653ec05fffbb141363fbe908baf6f5f254067562e8013667ca6b3",
+                "sha256:6b3c2907f8eb6be4436f41b8b2bc5e7e6bb02bac3a689e0b2cb1032b747cfcf1",
+                "sha256:814889a889a16fee614e9158e84d7613a32a85de91a5c1aa614b5fa6e5a68353",
+                "sha256:8b76877c1c4a1d90034996f6500bbc5ece3da6388f206b73b3e11f496df66f0e",
+                "sha256:913d39a5c0aaf73d34b53e01228543c2cde60c6cb2164edcd65e61751afca773",
+                "sha256:9348b132b65b1200a2259685a161649e7190dce7ea084f93ea820d46f89bba8f",
+                "sha256:97cdb38529cc89e346d209fcaf947dc89ac97a972224dfdcd2dc02fd4a82abad",
+                "sha256:b37bdb071708a86366a29305c5c485b96a8dc5ac0cf2766b86eb5c740e2aaf71",
+                "sha256:c962f86f3e29785d770317554a3eebeb7c612ae0eab67be4ec783336b6d95766",
+                "sha256:d9d8a0c629551d6c0326ce5a4b7354e7bf9c44f63e765fc18b2db5191d702e4f",
+                "sha256:f75622c9f92c056faa685311c0e754e4ace6b7745dead9c756de25d8ccfff106",
+                "sha256:f96e206d58a61ea68419160167c1702ddc9d7beffa6b7a725ea8e3435d007f0e"
+            ],
+            "index": "pypi",
+            "version": "==2.4.0"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:1873c03321fc118f4e9746baf201ff990ceb915f433f23b395f5580d1840cb2a",
+                "sha256:9b6323ef4ab914af344ba97510e966d64ba91055d6b9afa6b30799340e89cc03"
+            ],
+            "index": "pypi",
+            "version": "==2.4.0"
+        },
+        "pyteomics": {
+            "hashes": [
+                "sha256:3c72ca6e521c0f2183d1c002935e5662b857711425179dc30038665c38fc2a53",
+                "sha256:8eef7f325bb818fe403e3c0859b36eecd22efbd4012d7610f51479d88ced72a2"
+            ],
+            "index": "pypi",
+            "version": "==4.1.2"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
+                "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e"
+            ],
+            "index": "pypi",
+            "version": "==2.8.0"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:303879e36b721603cc54604edcac9d20401bdbe31e1e4fdee5b9f98d5d31dfda",
+                "sha256:d747dd3d23d77ef44c6a3526e274af6efeb0a6f1afd5a69ba4d5be4098c8e141"
+            ],
+            "index": "pypi",
+            "version": "==2019.1"
+        },
+        "scipy": {
+            "hashes": [
+                "sha256:03b1e0775edbe6a4c64effb05fff2ce1429b76d29d754aa5ee2d848b60033351",
+                "sha256:09d008237baabf52a5d4f5a6fcf9b3c03408f3f61a69c404472a16861a73917e",
+                "sha256:10325f0ffac2400b1ec09537b7e403419dcd25d9fee602a44e8a32119af9079e",
+                "sha256:1db9f964ed9c52dc5bd6127f0dd90ac89791daa690a5665cc01eae185912e1ba",
+                "sha256:409846be9d6bdcbd78b9e5afe2f64b2da5a923dd7c1cd0615ce589489533fdbb",
+                "sha256:4907040f62b91c2e170359c3d36c000af783f0fa1516a83d6c1517cde0af5340",
+                "sha256:6c0543f2fdd38dee631fb023c0f31c284a532d205590b393d72009c14847f5b1",
+                "sha256:826b9f5fbb7f908a13aa1efd4b7321e36992f5868d5d8311c7b40cf9b11ca0e7",
+                "sha256:a7695a378c2ce402405ea37b12c7a338a8755e081869bd6b95858893ceb617ae",
+                "sha256:a84c31e8409b420c3ca57fd30c7589378d6fdc8d155d866a7f8e6e80dec6fd06",
+                "sha256:adadeeae5500de0da2b9e8dd478520d0a9945b577b2198f2462555e68f58e7ef",
+                "sha256:b283a76a83fe463c9587a2c88003f800e08c3929dfbeba833b78260f9c209785",
+                "sha256:c19a7389ab3cd712058a8c3c9ffd8d27a57f3d84b9c91a931f542682bb3d269d",
+                "sha256:c3bb4bd2aca82fb498247deeac12265921fe231502a6bc6edea3ee7fe6c40a7a",
+                "sha256:c5ea60ece0c0c1c849025bfc541b60a6751b491b6f11dd9ef37ab5b8c9041921",
+                "sha256:db61a640ca20f237317d27bc658c1fc54c7581ff7f6502d112922dc285bdabee"
+            ],
+            "index": "pypi",
+            "version": "==1.3.0"
+        },
+        "seaborn": {
+            "hashes": [
+                "sha256:42e627b24e849c2d3bbfd059e00005f6afbc4a76e4895baf44ae23fe8a4b09a5",
+                "sha256:76c83f794ca320fb6b23a7c6192d5e185a5fcf4758966a0c0a54baee46d41e2f"
+            ],
+            "index": "pypi",
+            "version": "==0.9.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
+                "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
+            ],
+            "index": "pypi",
+            "version": "==1.12.0"
+        },
+        "subprocess32": {
+            "hashes": [
+                "sha256:88e37c1aac5388df41cc8a8456bb49ebffd321a3ad4d70358e3518176de3a56b",
+                "sha256:eb2937c80497978d181efa1b839ec2d9622cf9600a039a79d0e108d1f9aec79d"
+            ],
+            "index": "pypi",
+            "version": "==3.5.4"
+        }
+    },
+    "develop": {}
+}
diff --git a/ProteoFileReader.py b/ProteoFileReader.py
index 6d7cbb6..dd816cd 100755
--- a/ProteoFileReader.py
+++ b/ProteoFileReader.py
@@ -13,6 +13,125 @@
 import numpy as np
 import sys
 import pyopenms as oms
+import io
+import os
+
+
+def read_mgf(mgf_file):
+    mgf_reader = io.open(mgf_file, "r")
+
+    spectra = []
+    peaks = []
+    RT, pep_mz, pep_int, charge, scanID, ms2id = -1, -1, -1, -1, -1, -1
+    title, detector, fragmethod = "", "", ""
+    peak_re = re.compile(r'^([0-9.e+\-]+)\s([0-9.e+\-]+)')
+
+    for line in mgf_reader:
+        if len(line.strip()) == 0:
+            continue
+        if re.match(peak_re, line):
+            mz_int_list = re.match(peak_re, line).groups()
+            peaks.append([float(x) for x in mz_int_list])
+        elif line.startswith("TITLE"):
+            title = line.replace('TITLE=', '').strip()
+            # scan_match = re.search("^TITLE=[^.]*.([0-9]+).", line)
+            # ms2id_scan_match = re.search("ms2_scanId=([0-9]+)", line)
+            # scanID = int(scan_match.groups()[0])
+            # if ms2id_scan_match:
+            #     ms2id = int(ms2id_scan_match.groups()[0])
+            # else:
+            #     ms2id = -1
+
+        elif line.startswith("RTINSECONDS"):
+            RT = float(re.search("RTINSECONDS=(.*)", line).groups()[0])
+
+        elif line.startswith("PEPMASS"):
+            precursor = re.search("PEPMASS=(.*)", line).groups()[0].split()
+            pep_mz = float(precursor[0])
+            try:
+                pep_int = float(precursor[1])
+            except:
+                pep_int = -1.0
+
+        elif line.startswith("CHARGE"):
+            charge = float(re.search("CHARGE=(-?\d)", line).groups()[0])
+
+        elif line.startswith("DETECTOR"):
+            detector = re.search("DETECTOR=(.*)", line).groups()[0].strip()
+
+        elif line.startswith("FRAGMETHOD"):
+            fragmethod = re.search("FRAGMETHOD=(.*)", line).groups()[0].strip()
+
+        elif "END IONS" in line:
+            spectra.append(
+                MS2_spectrum(title, RT, pep_mz, pep_int, charge, peaks, detector=detector, fragmethod=fragmethod)
+            )
+            peaks = []
+            title, detector, fragmethod = "", "", ""
+            RT, pep_mz, pep_int, charge, scanID, ms2id = -1, -1, -1, -1, -1, -1
+
+    return spectra
+
+
+def write_mgf(spectra, outfile):
+    out_writer = open(os.path.join(outfile), "w")
+    out_writer.write('MASS=Monoisotopic\n')
+    for spectrum in spectra:
+        title = spectrum.getTitle()
+        # scan = re.search('scan=[0-9]*', title).group(0)[5:]
+        # try:
+        #     title = re.match('([A-Z])[0-9]{6}_[0-9]{2}.+?( )', title).group(0)[:-1]
+        # except AttributeError:
+        #     title = re.match('[0-9]{8}_[0-9]{2}.+?( )', title).group(0)[:-1]
+        # title = '.'.join([title, scan, scan, str(int(spectrum.charge))])
+        if 'ms2_scanId' in spectrum.getTitle():
+            try:
+                ms2_parent = re.search('ms2_scanId=.*scan=([0-9]+)', spectrum.getTitle()).groups()[0]
+            except AttributeError:
+                ms2_parent = 0
+            title += ' ms2_scanId=%s' % ms2_parent
+        stavrox_mgf = """
+BEGIN IONS
+TITLE={}
+PEPMASS={} {}
+CHARGE={}+
+RTINSECONDS={}
+DETECTOR={}
+FRAGMETHOD={}
+{}
+END IONS""".format(
+                title,
+                spectrum.getPrecursorMZ(),
+                spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
+                int(spectrum.charge),
+                spectrum.getRT(),
+                spectrum.getDetector(),
+                spectrum.getFragMethod(),
+                "\n".join([f"{mz} {i}" for mz, i in spectrum.peaks if i > 0])
+            )
+        out_writer.write(stavrox_mgf)
+
+
+def split_mgf_methods(mgf_in_file):
+    ms2_spectra = read_mgf(mgf_in_file)
+
+    methods = [
+        "CID",
+        "HCD",
+        "ETD",
+        "ETciD",
+        "EThcD"
+    ]
+
+    for method in methods:
+        split_spectra = [spectrum for spectrum in ms2_spectra if spectrum.getFragMethod() == method]
+
+        if len(split_spectra) > 0:
+            out_file_name = '%s_%s' % (method, os.path.split(mgf_in_file)[1])
+            out_file_path = os.path.join(os.path.split(mgf_in_file)[0], out_file_name)
+
+            write_mgf(split_spectra, out_file_path)
+
 
 def mzMLReader(in_file):
     """
@@ -26,7 +145,8 @@ def mzMLReader(in_file):
     file = oms.MzMLFile()
     exp = oms.MSExperiment()
     file.load(in_file, exp)
-    return(exp)
+    return exp
+
 
 class MS2_spectrum():
     """
@@ -50,64 +170,71 @@ class MS2_spectrum():
                 charge array for the peaks
 
     """
-    def __init__(self, title, RT, pepmass, pepint, charge, peaks, peakcharge=[]):
+    def __init__(self, title, RT, pepmz, pepint, charge, peaks, peakcharge=[], fragmethod='', detector=''):
         self.title = title
         self.RT = RT
-        self.pepmz = pepmass
+        self.pepmz = pepmz
         self.pepint = pepint
         self.charge = charge
         self.peaks = peaks
         self.peakcharge = peakcharge
+        self.fragMethod = fragmethod
+        self.detector = detector
 
     def getPrecursorMZ(self):
         """
         Returns the precursor mass
         """
-        return(self.pepmz)
+        return self.pepmz
 
     def getPrecursorIntensity(self):
         """
         Returns the precursor intensity
         """
-        return(self.pepint)
+        return self.pepint
 
     def getRT(self):
         """
         Returns the precursor RT
         """
-        return(self.RT)
+        return self.RT
 
     def getTitle(self):
         """
         Returns the precursor mass
         """
-        return(self.title)
+        return self.title
 
     def getPeaks(self):
         """
         Returns the spectrum peaks
         """
-        return(self.peaks)
+        return self.peaks
 
     def getMZ(self):
         """
         Returns the mz of the MS2
         """
-        return(self.peaks[:,0])
+        return self.peaks[:, 0]
 
     def getIntensities(self):
         """
         Returns the MS2 peak intensities
         """
-        return(self.peaks[:,1])
+        return self.peaks[:, 1]
 
     def getUnchargedMass(self):
         """
         Computs the uncharged mass of a fragment:
-        uncharged_mass = (mz * z ) - z
-        TODO: fix Hydrogen mass!
+        uncharged_mass = (mz - hydrogen mass) * z
         """
-        return( (self.pepmass * self.charge) -  self.charge)
+        return (self.pepmz - 1.007276466879) * self.charge
+
+    def getFragMethod(self):
+        return self.fragMethod
+
+    def getDetector(self):
+        return self.detector
 
     def printf(self):
         print ("Title, RT, PEPMASS, PEPINT, CHARGE")
@@ -117,7 +244,7 @@ def to_mgf(self):
         # need dummy values in case no peak charges are in the data
         if len(self.peakcharge) == 0:
             self.peakcharge = [""]*self.peaks.shape[0]
-        mgf_str="""
+        mgf_str = """
 BEGIN IONS
 TITLE=%s
 RTINSECONDS=%s
@@ -126,7 +253,7 @@ def to_mgf(self):
 %s
 END IONS
         """ % (self.title, self.RT, self.pepmz, self.pepint, self.charge, "\r\n".join(["%s %s %s" % (i[0], i[1], j, ) for i,j in zip(self.peaks, self.peakcharge)]))
-        return(mgf_str)
+        return mgf_str
 
     
 #==============================================================================
@@ -343,6 +470,6 @@ def store(self, out_file, ms_list):
 charge=%s
 %s
 peaklist end
-""" % (ms.title, ms.pepmass, ms.charge, "\r\n".join(["%s %s" % (i, j ) for i,j in ms.peaks]))
+""" % (ms.title, ms.pepmass, ms.charge, "\r\n".join(["%s %s" % (i, j) for i,j in ms.peaks]))
             out_mgf.write(mgf_str)
         out_mgf.close()
diff --git a/gui.py b/gui.py
deleted file mode 100755
index 36ad1b8..0000000
--- a/gui.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import preprocessing
-from multiprocessing import Pool
-from functools import partial
-import sys
-
-sys.path.append('D:/software/wxpython/wx-3.0-msw')
-from gooey import Gooey, GooeyParser
-
-
-@Gooey(
-    program_description="Converts raw files to mgfs whith optional MS2 denoising.",
-    default_size=(610, 400))
-# if __name__ == '__main__':
-def main():
-    parser = GooeyParser()
-    parser.add_argument('input', widget="DirChooser")
-    parser.add_argument('output', widget="DirChooser")
-    parser.add_argument('config', widget="FileChooser")
-
-    args = parser.parse_args()
-
-    full_paths = [os.path.join(args.input, rawfile) for rawfile in os.listdir(args.input)]
-    full_paths = [x for x in full_paths if not os.path.isdir(x)]
-
-    if args.input == args.output:
-        args.output = os.path.join(args.output, 'processed')
-
-    execfile(args.config, globals())
-
-    pool = Pool(processes=int(nthr))
-    pool.map(partial(preprocessing.process_file, outdir=args.output, mscon_settings=mscon_settings, split_acq=split_acq,
-                     detector_filter=detector_filter, mscon_exe=msconvert_exe), full_paths)
-    pool.close()
-    pool.join()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/mass_recal.py b/mass_recal.py
index b159e78..466ad2e 100755
--- a/mass_recal.py
+++ b/mass_recal.py
@@ -5,7 +5,6 @@
 import numpy as np
 import pandas as pd
 import ProteoFileReader
-import sys
 
 
 def xi_wrapper(arguments):
@@ -51,7 +50,7 @@ def get_ppm_error(xi_df, outfile):
         err = input('Enter error to correct by (0 for no correction):\n')
         try:
             err = float(err)
-            if (err != 0):
+            if err != 0:
                 return err
             elif err == 0:
                 return 0
@@ -61,50 +60,22 @@ def get_ppm_error(xi_df, outfile):
     return median_err
 
 
-def adjust_prec_mz(mgf_file, error, outpath):
-    outfile = os.path.join(outpath, 'recal_' + os.path.split(mgf_file)[1])
-    if not os.path.exists(outpath):
-        os.makedirs(outpath)
+def adjust_prec_mz(mgf_file, error, out_path):
+    outfile = os.path.join(out_path, 'recal_' + os.path.split(mgf_file)[1])
+    if not os.path.exists(out_path):
+        os.makedirs(out_path)
     elif os.path.isfile(outfile):
-        return
-    exp = ProteoFileReader.MGF_Reader()
-    exp.load(mgf_file)
-
-    out_writer = open(os.path.join(outfile), "w")
-    for spectrum in exp:
-        prec_mz_new = spectrum.getPrecursorMass()/(1-error/10.**6)
-        if sys.version_info.major < 3:
-            stavrox_mgf = """
-MASS=Monoisotopic
-BEGIN IONS
-TITLE={}
-PEPMASS={} {}
-CHARGE={}+
-RTINSECONDS={}
-{}
-END IONS     """.format(spectrum.getTitle(),
-                            prec_mz_new, spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
-                                int(spectrum.charge), spectrum.getRT(),
-                                "\n".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks if i[1] > 0]))
-        else:
-            stavrox_mgf = """
-MASS=Monoisotopic
-BEGIN IONS
-TITLE={}
-PEPMASS={} {}
-CHARGE={}+
-RTINSECONDS={}
-{}
-END IONS     """.format(spectrum.getTitle(),
-                        prec_mz_new,
-                        spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
-                        int(spectrum.charge), spectrum.getRT(),
-                        "\n".join(["%s %s" % (mz, spectrum.peaks[1][i]) for i, mz in enumerate(spectrum.peaks[0]) if
-                             spectrum.peaks[1][i] > 0]))
-        out_writer.write(stavrox_mgf)
-
-
-def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.739.jar', val_input=None):
+        raise Exception('File %s already exists!' % outfile)
+
+    ms2_spectra = ProteoFileReader.read_mgf(mgf_file)
+
+    for spectrum in ms2_spectra:
+        spectrum.pepmz = spectrum.getPrecursorMZ() / (1 - error / 10.0 ** 6)
+
+    ProteoFileReader.write_mgf(ms2_spectra, outfile)
+
+
+def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.745.jar', val_input=None):
     if not os.path.exists(outpath):
         os.makedirs(outpath)
 
@@ -124,8 +95,8 @@ def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.
         ms1_input = pd.read_csv(val_input, header=None, index_col=0)
         ms1_err = ms1_input[ms1_input.index.str.contains('_'.join(filename.split('_')[1:]))].values[0][0]
 
-    if ms1_err is not None: # shift all old m/z by value
-        adjust_prec_mz(mgf_file=mgf, error=ms1_err, outpath=os.path.join(outpath))
+    if ms1_err is not None:     # shift all old m/z by value
+        adjust_prec_mz(mgf_file=mgf, error=ms1_err, out_path=os.path.join(outpath))
 
 
 if __name__ == '__main__':
diff --git a/mass_recal_ms2.py b/mass_recal_ms2.py
new file mode 100755
index 0000000..32c0a44
--- /dev/null
+++ b/mass_recal_ms2.py
@@ -0,0 +1,127 @@
+import os
+import subprocess
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+import ProteoFileReader
+
+
+def xi_wrapper(arguments):
+    xi = subprocess.Popen(arguments)
+    xi.communicate()
+    return
+
+
+def run_xi_lin(peakfile, fasta, cnf, outpath, xipath, threads='1'):
+    if not os.path.exists(outpath):
+        os.makedirs(outpath)
+    elif os.path.isfile(outpath + '/xi_' + os.path.split(peakfile)[1].replace('.mgf', '.csv')):
+        return
+
+    xi_cmds = ['java', '-cp', os.path.join(os.path.dirname(os.path.realpath(__file__)), xipath),
+               'rappsilber.applications.Xi', # + '/fastutil-8.1.0.jar;' + xipath + '/XiSearch.jar'
+               '--fasta=' + fasta,
+               '--xiconf=UseCPUs:' + threads,
+               '--peaks=' + peakfile,
+               '--config=' + cnf,
+               '--output=' + outpath + '/xi_' + os.path.split(peakfile)[1].replace('.mgf', '.csv'),
+               '--peaksout=%s_peaks.csv.gz' % peakfile[:len(peakfile) - 4]]
+
+    print('calling ' + subprocess.list2cmdline(xi_cmds))
+    xi = subprocess.Popen(xi_cmds) #, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    xi.communicate()
+
+
+def get_ppm_error(xi_df, peaks_df, outfile):
+    xi_df = xi_df[(xi_df.decoy == 0) & (xi_df['match score'] > 6)]
+    median_err = np.median(xi_df['Precoursor Error'])
+    try:
+        fig, ax = plt.subplots()
+        sns.distplot(xi_df['Precoursor Error'], norm_hist=False, kde=False)
+        ax.axvline(median_err)
+        plt.savefig(outfile)
+        plt.close()
+    except ZeroDivisionError:
+        print(xi_df['Precoursor Error'][:5])
+
+    if len(xi_df) < 75:
+        print(os.path.split(outfile)[1] + ': Only %s PSMs found. Median error is %s.' % (len(xi_df), median_err))
+        err = input('Enter error to correct by (0 for no correction):\n')
+        try:
+            # err = float(err)
+            if err != '0':
+                return float(err), 0
+            elif err == '0':
+                return 0, 0
+        except ValueError:
+            return 0, 0
+
+    xi_ms2_df = peaks_df[peaks_df["IsPrimaryMatch"] == 1]
+    xi_ms2_df["MS2Error_ppm"] = (xi_ms2_df["MS2Error"] * 10. ** 6) / xi_ms2_df["CalcMZ"]
+    xi_ms2_df = xi_ms2_df.merge(xi_df[['Scan', 'Run', 'decoy']],
+                                      left_on=['ScanNumber', 'Run'], right_on=['Scan', 'Run'], how='inner')
+    xi_ms2_df = xi_ms2_df[(xi_ms2_df["MS2Error_ppm"] <= 30) & (xi_ms2_df["MS2Error_ppm"] >= -30)]
+    median_err_ms2 = np.median(xi_ms2_df["MS2Error_ppm"])
+
+    fig, ax = plt.subplots()
+    sns.distplot(xi_ms2_df["MS2Error_ppm"], norm_hist=False, kde=False)
+    ax.axvline(median_err_ms2)
+    plt.xlabel("mass error")
+    plt.title("MS2 Error distribution \n median: " + str(median_err_ms2))
+    plt.ylabel("# of identifications")
+    plt.xlim(-20, 20)
+    plt.savefig(os.path.join(outfile.replace('MS1', "MS2")))
+    plt.close()
+
+    return median_err, median_err_ms2
+
+
+def adjust_prec_mz(mgf_file, ms1_error, ms2_error, outpath):
+    outfile = os.path.join(outpath, 'recal_' + os.path.split(mgf_file)[1])
+    if not os.path.exists(outpath):
+        os.makedirs(outpath)
+    elif os.path.isfile(outfile):
+        raise Exception('File %s already exists!' % outfile)
+    ms2_spectra = ProteoFileReader.read_mgf(mgf_file)
+
+    for spectrum in ms2_spectra:
+        # ms1/precursor correction
+        spectrum.pepmz = spectrum.getPrecursorMZ() / (1 + ms1_error / 10.0 ** 6)  # TODO wrong sign if newer version
+
+        # ms2 peak correction
+        ms2_peaks = spectrum.getPeaks()
+        for i in range(0, len(ms2_peaks)):
+            ms2_peaks[i][0] = ms2_peaks[i][0] / (1 + ms2_error / 10. ** 6)
+
+        spectrum.peaks = ms2_peaks
+
+    ProteoFileReader.write_mgf(ms2_spectra, outfile)
+
+
+def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.745.jar', val_input=None):
+    if not os.path.exists(outpath):
+        os.makedirs(outpath)
+
+    filename = os.path.split(mgf)[1]
+    if val_input is None:
+        # linear small search in Xi
+        run_xi_lin(peakfile=mgf, fasta=fasta, cnf=xi_cnf, outpath=os.path.join(outpath), xipath=xi_jar, threads=threads)
+
+        xi_df = pd.read_csv(os.path.join(outpath, 'xi_' + filename.replace('.mgf', '.csv')))
+        peaks_df = pd.read_csv(os.path.join(outpath, filename.replace('.mgf', '_peaks.csv.gz')),
+                               sep='\t', index_col=False, thousands=',')
+        # evaluate results, get median ms1 error
+        ms1_err, ms2_err = get_ppm_error(xi_df=xi_df, peaks_df=peaks_df,
+                                         outfile=os.path.join(outpath, 'MS1_err_' + filename + '.png'))
+
+        error_file = open(outpath + '/ms1_err.csv', 'a')
+        error_file.write(filename + ',' + str(ms1_err) + '\n')
+        error_file.close()
+    else:
+        ms1_input = pd.read_csv(val_input, header=None, index_col=0)
+        ms1_err = ms1_input[ms1_input.index.str.contains('_'.join(filename.split('_')[1:]))].values[0][0]
+        ms2_err = 0  # ToDo val input for ms2 error?
+
+    if ms1_err is not None:  # shift all old m/z by value
+        adjust_prec_mz(mgf_file=mgf, ms1_error=ms1_err, ms2_error=ms2_err, outpath=os.path.join(outpath))
diff --git a/mass_trace.py b/mass_trace.py
index 0fe4530..a14b889 100755
--- a/mass_trace.py
+++ b/mass_trace.py
@@ -84,7 +84,7 @@ def extend_mass_mz(exp, MS1scan, mz, seed_scan, scans_masstrace,
     currentcount = 0
     while RTdiff <= RTdiff_max:
         currentcount += 1
-        if currentcount >= maxcount:
+        if currentcount >= maxcount or (MS1scan + scan_it <= 1):
             # print ("max nscan diff reached") # TODO: check if really sensible (data I checked seemed ok for a bit over 200)
             break
         scan_it += constant
diff --git a/mps_ms1.py b/mps_ms1.py
index 00eb334..9e62d4f 100755
--- a/mps_ms1.py
+++ b/mps_ms1.py
@@ -1,234 +1,234 @@
-import os
-import pyopenms as oms
-import numpy as np
-from ProteoFileReader import MGF_Reader
-import mass_trace
-from joblib import Parallel, delayed
-import re
-
-
-def add_relaxation_mgf(mgf, mps, outfile, create_comparison=False):
-    # mgf_file, outdir, differences = argls[0], argls[1], argls[2]
-    mass_diff = 1.00335483
-    # if '.mgf' in args['file']:
-    filename = os.path.split(mgf)[1]
-    # read mgf
-    spectra = MGF_Reader()
-    spectra.load(mgf)
-    out_writer = open(outfile, "w")
-    for spectrum in spectra:
-        # calculate mass (neglect proton bec. later on difference used)
-        regex_match = re.search('(scan=)[0-9]*', spectrum.getTitle())
-        if regex_match is not None:
-            scan = int(regex_match.group(0).split('scan=')[1])
-        else:
-            scan = int(spectrum.getTitle().split('.')[-2])
-        # scan = int(spectrum.getTitle().split('.')[-2])
-        # try:
-        differences = [0, 1, 2, 3, 4] #[0, -1, -2, -3, -4]
-        if not create_comparison:
-            row = mps[mps[:, 0] == scan, 1:]
-            if len(row) == 1:
-                differences = [-i for i in range(len(row[0])) if row[0][i] == 1]
-            elif len(row) > 1:
-                raise ValueError('multiple matches to scan %s' % scan)
-            else:
-                print 'scan %s not found' % scan
-
-        mass = spectrum.getPrecursorMass() * spectrum.charge
-        spectra_add_mip = [str((mass + x * mass_diff) / spectrum.charge) for x in differences if x != 0]
-        if 0 in differences:
-            prec_mz = spectrum.getPrecursorMass()
-        else:
-            prec_mz = spectra_add_mip[0]
-            spectra_add_mip = spectra_add_mip[1:]
-        # except KeyError:
-        #     differences = [-2, -1, 0]
-
-        stavrox_mgf = """
-MASS=Monoisotopic
-BEGIN IONS
-TITLE={}
-PEPMASS={} {}
-CHARGE={}+
-RTINSECONDS={}
-ADDITIONALMZ={}
-{}
-END IONS     """.format(spectrum.getTitle(), prec_mz,
-                        spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
-                        int(spectrum.charge), spectrum.getRT(),
-                        ';'.join(spectra_add_mip),
-                "\r".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks]))
-        out_writer.write(stavrox_mgf)
-
-
-def read_mzml(infile):
-    # init variables
-    mzml_file = oms.MzMLFile()
-    exp = oms.MSExperiment()
-
-    # load spectra into exp
-    mzml_file.load(infile, exp)
-    return (exp)
-
-
-def get_error(mz1, mz2, charge=None, ppm=True):
-    if ppm:
-        return abs(mz1 - mz2) / mz2 * 1e6
-    else:
-        if charge is not None:
-            return (mz1 - mz2) * charge
-
-
-def return_mps_range(intensity, mps_max):
-    if False: #intensity >= 10e6:
-        return [-1]
-    else:
-        return list(range(-1, mps_max - 1, -1))
-
-
-def ms1_peaks(exp, tolerance=6, mps_range=[-1, -2, -3, -4]): # mps_range=[-1, -2, -3, -4]
-    # loop through spectra and make count if precursor in MS2 spectrum
-    peaks_found = []
-    n_ms2 = 0
-    nspectra = exp.size()
-    for i, spectrum in enumerate(exp):
-
-        if spectrum.getMSLevel() == 1:
-            continue
-        n_ms2 += 1
-        if i % 10000 == 0:
-            print "{}/{} done..".format(i, nspectra)
-
-        # if iidone not in matched_scans:
-        #     continue
-        precursor = spectrum.getPrecursors()[0]
-        prec_mz, prec_charge = precursor.getMZ(), precursor.getCharge()
-        MS1scan = mass_trace.find_parent_MS1_scan(exp, i)
-        ppm_pseudo = 10 # taken from Svens script, apparently not used in function
-        mz_trace, scans_trace = mass_trace.extract_mass_trace(exp, MS1scan, prec_mz, prec_charge, ppm_pseudo, tolerance, 10) # test with 20 to see difference
-        if len(scans_trace) == 1:
-            tmp_ms1 = exp[MS1scan] # TODO find out why mass_trace does not find anything
-        else:
-            # try:
-            best_isotope_seed = np.argmax(mz_trace[:, 1])
-            # except:
-            #     pass
-            best_seed_spectrum = scans_trace[best_isotope_seed]
-
-            # ms1_prev = 0
-            # for j in range(i, 0, -1):
-            tmp_ms1 = exp[best_seed_spectrum]
-
-        res = tmp_ms1[tmp_ms1.findNearest(prec_mz)]
-        if abs(res.getMZ() - prec_mz) / prec_mz <= tolerance:
-            prec_int = res.getIntensity()
-            # mps_range = return_mps_range(prec_int, mps_max)
-            # if prec_int >= 3e6:
-            #     peaks_found.append([i + 1, True] + [False] * len(mps_range))
-            #     continue
-            # else:
-            #     peaks_found.append([i + 1, True] + [True] * len(mps_range))
-            #     continue
-            theo_mip = np.array([prec_mz + (mip_i * 1.00335483) / prec_charge for mip_i in mps_range])
-            mip_nearest = [tmp_ms1.findNearest(x) for x in theo_mip]
-            error = np.array([get_error(tmp_ms1[mea].getMZ(), expi, ppm=True) for expi, mea in
-                              zip(theo_mip, mip_nearest)])
-            range_found = [True if x <= tolerance else False for x in error]
-            if sum(range_found) == 0:
-                # TODO: try if sensible to not mps search these
-                if len(mps_range) > 2:
-                    peaks_found.append(
-                        # [i + 1, True] + [True] * len(mps_range)
-                        # [i + 1, True, True, True] + [False] * (len(mps_range) - 2)
-                        # [i + 1, True, True, True, True, False]
-                        [i + 1, True] + [True] * (len(mps_range) - 1) + [False]
-                    )
-                else:
-                    peaks_found.append(
-                        [i + 1, True] + [True] * len(mps_range)
-                    )
-                continue
-            else:
-                # check for continous peaks except -1 peak
-                # TODO: allow gap?
-                found = False
-                lightest_peak = 1
-                for i_mip in range(len(range_found), 1, -1):
-                    if range_found[i_mip - 1] & (i_mip > lightest_peak):
-                        lightest_peak = i_mip
-                    if sum(range_found[:i_mip]) == len(range_found[:i_mip]):
-                        # if i_mip == len(range_found):
-                        #     sel = [False] * (i_mip - 2) + range_found[i_mip - 2:]
-                        # else:
-                        #     sel = [False] * (i_mip - 2) + [True] * 3 + [False] * (len(range_found) - 1 - i_mip) # 2
-                        # takes lightest 2 continous + existing lighter peaks, excludes heaviar
-                        sel = [False] * (i_mip - 2) + range_found[i_mip - 2:] # 2
-                        peaks_found.append([i + 1, False] + sel)
-                        found = True
-                        break
-                # if no continous found take all
-                if not found:
-                    if not lightest_peak == len(range_found):
-                        peaks_found.append(
-                            # [i + 1, True] + [True] * len(mps_range)
-                            # [i + 1, True] + [True] * (lightest_peak + 1) + [False] * (len(range_found) - lightest_peak - 1)
-                            [i + 1, True] + [True] * (lightest_peak) + [False] * (len(range_found) - lightest_peak)
-                        )
-                    else:
-                        peaks_found.append([i + 1, True] + [True] * len(mps_range))
-                    continue
-
-        else:
-            print 'Precursor not found'
-            continue
-
-    return np.array(peaks_found)
-
-
-def main(mzmlfile, exp_id, setting, infoout_dir, mgf_in_dir, mgf_out_dir):
-    exp = read_mzml(mzmlfile)
-    # exp_id = mzml_file[:10]
-
-    mps_df = ms1_peaks(exp)
-    np.savetxt(infoout_dir + '/%s_%s.csv' % (setting, exp_id), mps_df, delimiter=',')
-    corresponding_mgf = [x for x in os.listdir(mgf_in_dir) if exp_id in x][0]
-    add_relaxation_mgf(mgf=mgf_in_dir + corresponding_mgf, mps=mps_df,
-                       outfile=mgf_out_dir + '/%s_' % setting + corresponding_mgf)
-
-if __name__ == '__main__':
-    isotope_diff = 1.00335483
-    # mzml_dir = 'D:/user/Swantje/data/PC/mzML/'
-    # chaet_dir = 'fr7-10'
-    # mzml_dir = 'D:/user/Swantje/data/Chaetomium/frac7_10/mzML/'
-    mgf_filtered_dir = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/mscon_PF_20_100_0/'
-    # mgf_filtered_dir = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/mscon_PF_20/' % chaet_dir
-    setting_name = 'decoy_pos4_only'
-    mgf_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/' + setting_name
-    info_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/relaxation_tbls/' + setting_name
-    # mgf_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/' % chaet_dir + setting_name
-    # info_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/relaxation_tbls/' % chaet_dir + setting_name
-
-    if not os.path.exists(mgf_out):
-        os.makedirs(mgf_out)
-    if not os.path.exists(info_out):
-        os.makedirs(info_out)
-
-    # mzmls_in = [x for x in os.listdir(mzml_dir) if '.mzML' in x]
-    # Parallel(n_jobs=4)(delayed(main)(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir,
-    #                                  mgf_out) for x in mzmls_in)
-    # for x in mzmls_in:
-    #     main(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir, mgf_out)
-    # for mzml_file in [x for x in os.listdir(mzml_dir) if '.mzML' in x]:
-    #     exp = read_mzml(mzml_dir + mzml_file)
-    #     exp_id = mzml_file[:10]
-    #
-    #     mps_df = ms1_peaks(exp)
-    #     np.savetxt(info_out + '/%s_%s.csv' % (setting_name, mzml_file), mps_df, delimiter=',')
-    #     corresponding_mgf = [x for x in os.listdir(mgf_filtered_dir) if exp_id in x][0]
-    #     add_relaxation_mgf(mgf=mgf_filtered_dir + corresponding_mgf, mps=mps_df,
-    #                        outfile=mgf_out + '/%s_' % setting_name + corresponding_mgf)
-
-    for mgf_file in os.listdir(mgf_filtered_dir):
-        add_relaxation_mgf(mgf=mgf_filtered_dir + mgf_file, mps=[], create_comparison=True,
-                           outfile='D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/' + setting_name + '_' + mgf_file)
+import os
+import pyopenms as oms
+import numpy as np
+from ProteoFileReader import MGF_Reader
+import mass_trace
+from joblib import Parallel, delayed
+import re
+
+
+def add_relaxation_mgf(mgf, mps, outfile, create_comparison=False):
+    # mgf_file, outdir, differences = argls[0], argls[1], argls[2]
+    mass_diff = 1.00335483
+    # if '.mgf' in args['file']:
+    filename = os.path.split(mgf)[1]
+    # read mgf
+    spectra = MGF_Reader()
+    spectra.load(mgf)
+    out_writer = open(outfile, "w")
+    for spectrum in spectra:
+        # calculate mass (neglect proton bec. later on difference used)
+        regex_match = re.search('(scan=)[0-9]*', spectrum.getTitle())
+        if regex_match is not None:
+            scan = int(regex_match.group(0).split('scan=')[1])
+        else:
+            scan = int(spectrum.getTitle().split('.')[-2])
+        # scan = int(spectrum.getTitle().split('.')[-2])
+        # try:
+        differences = [0, -1, -2, -3, -4]
+        if not create_comparison:
+            row = mps[mps[:, 0] == scan, 1:]
+            if len(row) == 1:
+                differences = [-i for i in range(len(row[0])) if row[0][i] == 1]
+            elif len(row) > 1:
+                raise ValueError('multiple matches to scan %s' % scan)
+            else:
+                print 'scan %s not found' % scan
+
+        mass = spectrum.getPrecursorMZ() * spectrum.charge          # ToDo: correct? talk to SL -protonMass missing!
+        spectra_add_mip = [str((mass + x * mass_diff) / spectrum.charge) for x in differences if x != 0]
+        if 0 in differences:
+            prec_mz = spectrum.getPrecursorMZ()
+        else:
+            prec_mz = spectra_add_mip[0]
+            spectra_add_mip = spectra_add_mip[1:]
+        # except KeyError:
+        #     differences = [-2, -1, 0]
+
+        stavrox_mgf = """
+MASS=Monoisotopic
+BEGIN IONS
+TITLE={}
+PEPMASS={} {}
+CHARGE={}+
+RTINSECONDS={}
+ADDITIONALMZ={}
+{}
+END IONS     """.format(spectrum.getTitle(), prec_mz,
+                        spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
+                        int(spectrum.charge), spectrum.getRT(),
+                        ';'.join(spectra_add_mip),
+                "\r".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks]))
+        out_writer.write(stavrox_mgf)
+
+
+def read_mzml(infile):
+    # init variables
+    mzml_file = oms.MzMLFile()
+    exp = oms.MSExperiment()
+
+    # load spectra into exp
+    mzml_file.load(infile, exp)
+    return (exp)
+
+
+def get_error(mz1, mz2, charge=None, ppm=True):
+    if ppm:
+        return abs(mz1 - mz2) / mz2 * 1e6
+    else:
+        if charge is not None:
+            return (mz1 - mz2) * charge
+
+
+def return_mps_range(intensity, mps_max):
+    if False: #intensity >= 10e6:
+        return [-1]
+    else:
+        return list(range(-1, mps_max - 1, -1))
+
+
+def ms1_peaks(exp, tolerance=6, mps_range=[-1, -2, -3, -4]): # mps_range=[-1, -2, -3, -4]
+    # loop through spectra and make count if precursor in MS2 spectrum
+    peaks_found = []
+    n_ms2 = 0
+    nspectra = exp.size()
+    for i, spectrum in enumerate(exp):
+
+        if spectrum.getMSLevel() == 1:
+            continue
+        n_ms2 += 1
+        if i % 10000 == 0:
+            print "{}/{} done..".format(i, nspectra)
+
+        # if iidone not in matched_scans:
+        #     continue
+        precursor = spectrum.getPrecursors()[0]
+        prec_mz, prec_charge = precursor.getMZ(), precursor.getCharge()
+        MS1scan = mass_trace.find_parent_MS1_scan(exp, i)
+        ppm_pseudo = 10 # taken from Svens script, apparently not used in function
+        mz_trace, scans_trace = mass_trace.extract_mass_trace(exp, MS1scan, prec_mz, prec_charge, ppm_pseudo, tolerance, 10) # test with 20 to see difference
+        if len(scans_trace) == 1:
+            tmp_ms1 = exp[MS1scan] # TODO find out why mass_trace does not find anything
+        else:
+            # try:
+            best_isotope_seed = np.argmax(mz_trace[:, 1])
+            # except:
+            #     pass
+            best_seed_spectrum = scans_trace[best_isotope_seed]
+
+            # ms1_prev = 0
+            # for j in range(i, 0, -1):
+            tmp_ms1 = exp[best_seed_spectrum]
+
+        res = tmp_ms1[tmp_ms1.findNearest(prec_mz)]
+        if abs(res.getMZ() - prec_mz) / prec_mz <= tolerance:
+            prec_int = res.getIntensity()
+            # mps_range = return_mps_range(prec_int, mps_max)
+            # if prec_int >= 3e6:
+            #     peaks_found.append([i + 1, True] + [False] * len(mps_range))
+            #     continue
+            # else:
+            #     peaks_found.append([i + 1, True] + [True] * len(mps_range))
+            #     continue
+            theo_mip = np.array([prec_mz + (mip_i * 1.00335483) / prec_charge for mip_i in mps_range])
+            mip_nearest = [tmp_ms1.findNearest(x) for x in theo_mip]
+            error = np.array([get_error(tmp_ms1[mea].getMZ(), expi, ppm=True) for expi, mea in
+                              zip(theo_mip, mip_nearest)])
+            range_found = [True if x <= tolerance else False for x in error]
+            if sum(range_found) == 0:
+                # TODO: try if sensible to not mps search these
+                if len(mps_range) > 2:
+                    peaks_found.append(
+                        # [i + 1, True] + [True] * len(mps_range)
+                        # [i + 1, True, True, True] + [False] * (len(mps_range) - 2)
+                        # [i + 1, True, True, True, True, False]
+                        [i + 1, True] + [True] * (len(mps_range) - 1) + [False]
+                    )
+                else:
+                    peaks_found.append(
+                        [i + 1, True] + [True] * len(mps_range)
+                    )
+                continue
+            else:
+                # check for continous peaks except -1 peak
+                # TODO: allow gap?
+                found = False
+                lightest_peak = 1
+                for i_mip in range(len(range_found), 1, -1):
+                    if range_found[i_mip - 1] & (i_mip > lightest_peak):
+                        lightest_peak = i_mip
+                    if sum(range_found[:i_mip]) == len(range_found[:i_mip]):
+                        # if i_mip == len(range_found):
+                        #     sel = [False] * (i_mip - 2) + range_found[i_mip - 2:]
+                        # else:
+                        #     sel = [False] * (i_mip - 2) + [True] * 3 + [False] * (len(range_found) - 1 - i_mip) # 2
+                        # takes lightest 2 continous + existing lighter peaks, excludes heaviar
+                        sel = [False] * (i_mip - 2) + range_found[i_mip - 2:] # 2
+                        peaks_found.append([i + 1, False] + sel)
+                        found = True
+                        break
+                # if no continous found take all
+                if not found:
+                    if not lightest_peak == len(range_found):
+                        peaks_found.append(
+                            # [i + 1, True] + [True] * len(mps_range)
+                            # [i + 1, True] + [True] * (lightest_peak + 1) + [False] * (len(range_found) - lightest_peak - 1)
+                            [i + 1, True] + [True] * (lightest_peak) + [False] * (len(range_found) - lightest_peak)
+                        )
+                    else:
+                        peaks_found.append([i + 1, True] + [True] * len(mps_range))
+                    continue
+
+        else:
+            print 'Precursor not found'
+            continue
+
+    return np.array(peaks_found)
+
+
+def main(mzmlfile, exp_id, setting, infoout_dir, mgf_in_dir, mgf_out_dir):
+    exp = read_mzml(mzmlfile)
+    # exp_id = mzml_file[:10]
+
+    mps_df = ms1_peaks(exp)
+    np.savetxt(infoout_dir + '/%s_%s.csv' % (setting, exp_id), mps_df, delimiter=',')
+    corresponding_mgf = [x for x in os.listdir(mgf_in_dir) if exp_id in x][0]
+    add_relaxation_mgf(mgf=mgf_in_dir + corresponding_mgf, mps=mps_df,
+                       outfile=mgf_out_dir + '/%s_' % setting + corresponding_mgf)
+
+if __name__ == '__main__':
+    isotope_diff = 1.00335483
+    # mzml_dir = 'D:/user/Swantje/data/PC/mzML/'
+    # chaet_dir = 'fr7-10'
+    mzml_dir = 'D:/user/Swantje/dsso/myco_prepro/1c_remaining_frac/test_set/mzml/'
+    mgf_filtered_dir = '//130.149.167.198/rappsilbergroup/users/lswantje/dsso_opt/searches/mycoplasma_opt/data/filtered/'
+    # mgf_filtered_dir = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/mscon_PF_20/' % chaet_dir
+    setting_name = 'mpsreductionfilt'
+    mgf_out = '//130.149.167.198/rappsilbergroup/users/lswantje/dsso_opt/searches/mycoplasma_opt/data/' + setting_name
+    info_out = '//130.149.167.198/rappsilbergroup/users/lswantje/dsso_opt/searches/mycoplasma_opt/' + setting_name
+    # mgf_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/' % chaet_dir + setting_name
+    # info_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/relaxation_tbls/' % chaet_dir + setting_name
+
+    if not os.path.exists(mgf_out):
+        os.makedirs(mgf_out)
+    if not os.path.exists(info_out):
+        os.makedirs(info_out)
+
+    mzmls_in = [x for x in os.listdir(mzml_dir) if '.mzML' in x]
+    # Parallel(n_jobs=4)(delayed(main)(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir,
+    #                                  mgf_out) for x in mzmls_in)
+    for x in mzmls_in:
+        main(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir, mgf_out)
+    for mzml_file in [x for x in os.listdir(mzml_dir) if '.mzML' in x]:
+        exp = read_mzml(mzml_dir + mzml_file)
+        exp_id = mzml_file[:10]
+
+        mps_df = ms1_peaks(exp)
+        np.savetxt(info_out + '/%s_%s.csv' % (setting_name, mzml_file), mps_df, delimiter=',')
+        corresponding_mgf = [x for x in os.listdir(mgf_filtered_dir) if exp_id in x][0]
+        add_relaxation_mgf(mgf=mgf_filtered_dir + corresponding_mgf, mps=mps_df,
+                           outfile=mgf_out + '/%s_' % setting_name + corresponding_mgf)
+
+    # for mgf_file in os.listdir(mgf_filtered_dir):
+    #     add_relaxation_mgf(mgf=mgf_filtered_dir + mgf_file,
+    #                        outfile='D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/' + setting_name + '_' + mgf_file)
diff --git a/preprocessing.py b/preprocessing.py
index 6ec08f2..5e82ddd 100755
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -1,5 +1,4 @@
 import os
-import numpy as np
 import subprocess
 from multiprocessing import Pool
 import sys
@@ -8,22 +7,26 @@
 from pyteomics import mzml
 from functools import partial
 import ProteoFileReader
+import mass_recal_ms2
 import mass_recal
-import zipfile
 
 
 def read_cmdline():
     try:
-        opts, args = getopt.getopt(sys.argv[1:], '', ['input=', 'config=', 'outpath=', 'db=', 'xiconf=', 'shiftcsv=', 'skip_recal='])
+        opts, args = getopt.getopt(sys.argv[1:], '', ['input=', 'config=', 'outpath=', 'db=', 'xiconf=', 'shiftcsv=',
+                                                      'skip_recal=', 'skip_ms2_recal='])
     except getopt.GetoptError:
         print('preprocessing.py --input <folder or single file to process> '
               '--outpath <directory for output, default is separate folder in input directory> '
               '--config <path to config file> '
               '--db <path to database to search for recalibration>'
               '--xiconf <path to xi config to use for recalibration>',
-              '--shiftcsv <path to csv with fixed shifts> --skip_recal <boolean>')
+              '--shiftcsv <path to csv with fixed shifts>',
+              '--skip_recal <boolean>',
+              '--skip_ms2_recal <boolean>')
         sys.exit()
     recal = True
+    ms2recal = True
     recal_conf = {}
     for opt, arg in opts:
         if opt == '--input':
@@ -40,6 +43,8 @@ def read_cmdline():
             recal_conf['shift_csv'] = arg
         elif opt == '--skip_recal':
             recal = False
+        elif opt == '--skip_ms2_recal':
+            ms2recal = False
 
     if 'input_arg' not in locals() or 'config' not in locals():
         print('preprocessing.py --input <folder or single file to process> '
@@ -66,32 +71,27 @@ def read_cmdline():
               '--shiftcsv <path to csv with fixed shifts> --skip_recal <boolean>')
         sys.exit()
 
-    return input_arg, outdir, config, recal_conf, recal
+    return input_arg, outdir, config, recal_conf, recal, ms2recal
 
 
-def split_mzml(mzml_file, detector="all"):
+def mzml_to_MS2_spectra(mzml_file, detector_filter="all"):
     """
-    function to split a mzML file into dict of MS2_Spectra objects (can be written to mgf format)
-    by fragmentation method
+    function to split a mzML file into a list of MS2_Spectra objects (can be written to mgf format)
+    with fragmentation method and detector type
 
     Parameters:
     -----------------------------------------
     mzml_file: str,
             path to mzML file
+    detector_filter: filter scans by detector type ('all', 'FT', 'IT')
 
-    Return: dict {fragMethod: list(MS2_spectrum)
+    Return: list(MS2_spectrum)
 
     """
 
     mzml_reader = mzml.read(mzml_file)
-    ordered_ms2_spectra = {
-        "CID": [],
-        "HCD": [],
-        "ETD": [],
-        "ETciD": [],
-        "EThcD": [],
-        "unknown": []
-    }
+    sorted_ms2_spectra = []
+    unknown_frag_method_count = 0
 
     n = 0
     for spectrum in mzml_reader:
@@ -102,10 +102,10 @@ def split_mzml(mzml_file, detector="all"):
                 detector_str = re.search("^(FT|IT)", filter_str).groups()[0]
                 frag_groups = re.findall("@([A-z]+)([0-9.]+)", filter_str)
             except AttributeError:
-                raise StandardError("filter string parse error: %s" % filter_str)
+                raise Exception("filter string parse error: %s" % filter_str)
 
-            if not detector == "all":
-                if not detector == detector_str:
+            if not detector_filter == "all":
+                if not detector_filter == detector_str:
                     continue
 
             title = os.path.split(mzml_file)[1].split('.mzML')[0] + " " + spectrum['id']
@@ -119,27 +119,40 @@ def split_mzml(mzml_file, detector="all"):
             pre_z = precursor['charge state']
             peaks = zip(spectrum['m/z array'], spectrum['intensity array'])
 
-            ms2class_spectrum = ProteoFileReader.MS2_spectrum(title, rt, pre_mz, pre_int, pre_z, peaks)
-
             frag_methods = [f[0] for f in frag_groups]
 
             if "etd" in frag_methods:
                 if "cid" in frag_methods:
-                    ordered_ms2_spectra['ETciD'].append(ms2class_spectrum)
+                    frag_method = "ETciD"
                 elif "hcd" in frag_methods:
-                    ordered_ms2_spectra['EThcD'].append(ms2class_spectrum)
+                    frag_method = "EThcD"
                 else:
-                    ordered_ms2_spectra['ETD'].append(ms2class_spectrum)
+                    frag_method = "ETD"
             elif "cid" in frag_methods:
-                ordered_ms2_spectra['CID'].append(ms2class_spectrum)
+                frag_method = "CID"
             elif "hcd" in frag_methods:
-                ordered_ms2_spectra['HCD'].append(ms2class_spectrum)
+                frag_method = "HCD"
             else:
-                ordered_ms2_spectra['unknown'].append(ms2class_spectrum)
-    if len(ordered_ms2_spectra['unknown']) > 0:
-        raise Warning("The fragmentation method of %i spectra could not be identified" % len(ordered_ms2_spectra['unknown']))
+                frag_method = 'unknown'
+                unknown_frag_method_count += 1
+
+            ms2class_spectrum = ProteoFileReader.MS2_spectrum(
+                title,
+                rt,
+                pre_mz,
+                pre_int,
+                pre_z,
+                peaks,
+                detector=detector_str,
+                fragmethod=frag_method
+            )
+
+            sorted_ms2_spectra.append(ms2class_spectrum)
 
-    return {k: v for k, v in ordered_ms2_spectra.items() if len(v) > 0}
+    if unknown_frag_method_count > 0:
+        raise Warning("The fragmentation method of %i spectra could not be identified" % unknown_frag_method_count)
+
+    return sorted_ms2_spectra
 
 
 def generate_cihcd_spectra(mzml_file):
@@ -160,11 +173,11 @@ def generate_cihcd_spectra(mzml_file):
                 frag_groups = re.findall("@([A-z]+)([0-9.]+)", filter_str)
                 precursor_mz_groups = re.findall("([0-9.]+)@", filter_str)
             except AttributeError:
-                raise StandardError("filter string parse error: %s" % filter_str)
+                raise Exception("filter string parse error: %s" % filter_str)
             try:
                 ms2_id = spectrum['precursorList']['precursor'][0]['spectrumRef']
             except KeyError:
-                ms2_id = '' # TODO why Key ERror
+                ms2_id = ''  # TODO why Key Error
             title = os.path.split(mzml_file)[1].split('.mzML')[0] + " " + spectrum['id'] + " ms2_scanId=" + ms2_id
             rt = spectrum['scanList']['scan'][0]['scan start time'] * 60
 
@@ -199,55 +212,7 @@ def mscon_cmd(filepath, outdir, settings, mgf):
     return cmd_list
 
 
-def write_mgf(spectra, outfile):
-    out_writer = open(os.path.join(outfile), "w")
-    for spectrum in spectra:
-        scan = re.search('scan=[0-9]*', spectrum.getTitle()).group(0)[5:]
-        # title = spectrum.getTitle()
-        try:
-            title = re.match('(B|E)[0-9]{6}_[0-9]{2}.+?( )', spectrum.getTitle()).group(0)[:-1]
-        except AttributeError:
-            title = re.match('[0-9]{8}_[0-9]{2}.+?( )', spectrum.getTitle()).group(0)[:-1]
-        title = '.'.join([title, scan, scan, str(int(spectrum.charge))])
-        if 'ms2_scanId' in spectrum.getTitle():
-            try:
-                ms2_parent = re.search('ms2_scanId=.*scan=([0-9]+)', spectrum.getTitle()).groups()[0]
-            except AttributeError:
-                ms2_parent = 0
-            title += ' ms2_scanId=%s' % ms2_parent
-        if sys.version_info.major < 3:
-            stavrox_mgf = """
-MASS=Monoisotopic
-BEGIN IONS
-TITLE={}
-PEPMASS={} {}
-CHARGE={}+
-RTINSECONDS={}
-{}
-END IONS     """.format(title,
-                        spectrum.getPrecursorMass(),
-                        spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
-                        int(spectrum.charge), spectrum.getRT(),
-                        "\n".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks if i[1] > 0]))
-        else:
-            stavrox_mgf = """
-MASS=Monoisotopic
-BEGIN IONS
-TITLE={}
-PEPMASS={} {}
-CHARGE={}+
-RTINSECONDS={}
-{}
-END IONS     """.format(title,
-                        spectrum.getPrecursorMass(),
-                        spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0,
-                        int(spectrum.charge), spectrum.getRT(),
-                        "\n".join(["%s %s" % (mz, spectrum.peaks[1][i]) for i, mz in enumerate(spectrum.peaks[0]) if
-                                   spectrum.peaks[1][i] > 0]))
-        out_writer.write(stavrox_mgf)
-
-
-def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, mscon_exe, cihcd_ms3=True): #TODO implement option further up
+def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, mscon_exe, cihcd_ms3=False):  #TODO implement option further up
     if not os.path.exists(outdir):
         os.makedirs(outdir)
 
@@ -262,19 +227,20 @@ def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, m
 
     if cihcd_ms3:
         cihcd_spectra = generate_cihcd_spectra(mzml_file)
-        write_mgf(spectra=cihcd_spectra, outfile=os.path.join(outdir, 'CIhcD_ms3_' + filename[:filename.rfind('.')] + '.mgf'))
+        ProteoFileReader.write_mgf(spectra=cihcd_spectra, outfile=os.path.join(outdir, 'CIhcD_ms3_' + filename[:filename.rfind('.')] + '.mgf'))
 
     if split_acq:
-        splitted_spectra = split_mzml(mzml_file, detector_filter)
+        split_spectra = mzml_to_MS2_spectra(mzml_file, detector_filter)
 
-        for acq in splitted_spectra:
-            write_mgf(spectra=splitted_spectra[acq],
-                      outfile=os.path.join(outdir, acq + '_' + filename[:filename.rfind('.')]+'.mgf'))
+        ProteoFileReader.write_mgf(
+            spectra=split_spectra,
+            outfile=os.path.join(outdir, filename[:filename.rfind('.')]+'.mgf')
+        )
 
 
 if __name__ == '__main__':
-    # read cmdline arguments / get deafult values
-    input_arg, outdir, config_path, recal_conf, recal = read_cmdline()
+    # read cmdline arguments / get default values
+    input_arg, outdir, config_path, recal_conf, recal, ms2recal = read_cmdline()
     try:
         execfile(config_path)
     except NameError:
@@ -300,24 +266,29 @@ def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, m
     pool.close()
     pool.join()
 
-    recal_in = [os.path.join(outdir, x) for x in os.listdir(outdir) if '.mgf' in x]
+    mgf_file_list = [os.path.join(outdir, x) for x in os.listdir(outdir) if '.mgf' in x]
     if recal:
-        # pool = Pool(processes=nthr)
+
         if not os.path.exists(outdir):
             os.makedirs(outdir)
-        output = zipfile.ZipFile(outdir + '/recalibrated_files.zip', 'w', zipfile.ZIP_DEFLATED)
+
         # TODO change to parallel with manual input of error
-        for inputfile in recal_in:
+        for inputfile in mgf_file_list:
             if 'ms3' in os.path.split(inputfile)[1]:
                 continue
-            mass_recal.main(fasta=recal_conf['db'], xi_cnf=recal_conf['xiconf'], outpath=outdir,
-                            mgf=inputfile, threads=str(nthr),
-                            val_input=recal_conf['shift_csv']  #'D:/user/Swantje/dsso_ot_it_error/raw/processed_together/ms1_err.csv'
-                            )
-            # val_input='//130.149.167.198/rappsilbergroup/users/lswantje/DSSO_prepro/xlinkx/processed_wosplit/ms1_err.csv'
-            output.write(os.path.join(outdir, 'recal_' + os.path.split(inputfile)[1]),
-                         arcname='recal_' + os.path.split(inputfile)[1])
-            # pool.map(partial(mass_recal.main, fasta=database, xi_cnf=xi_recal_config, outpath=outdir + '/recal',
-        #                  xi_jar=xi_offline), recal_in)
-        # pool.close()
-        # pool.join()
+            if ms2recal:
+                mass_recal_ms2.main(fasta=recal_conf['db'], xi_cnf=recal_conf['xiconf'], outpath=outdir,
+                                    mgf=inputfile, threads=str(nthr),
+                                    val_input=recal_conf['shift_csv']
+                                    )
+            else:
+                mass_recal.main(fasta=recal_conf['db'], xi_cnf=recal_conf['xiconf'], outpath=outdir,
+                                mgf=inputfile, threads=str(nthr),
+                                val_input=recal_conf['shift_csv']
+                                )
+
+        mgf_file_list = [os.path.join(os.path.split(x)[0], 'recal_' + os.path.split(x)[1]) for x in mgf_file_list]
+
+    if split_acq:
+        for mgf_file in mgf_file_list:
+            ProteoFileReader.split_mgf_methods(mgf_file)
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 8dc241c..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-backports.functools-lru-cache==1.5
-cycler==0.10.0
-kiwisolver==1.0.1
-lxml==4.2.3
-matplotlib==2.2.2
-numpy==1.14.5
-pandas==0.23.3
-pyopenms==2.3.0.4
-pyparsing==2.2.0
-pyteomics==3.5.1
-python-dateutil==2.7.3
-pytz==2018.5
-scipy==1.1.0
-seaborn==0.9.0
-six==1.11.0
-subprocess32==3.5.2
diff --git a/resources/XiSearch_1.6.739.jar b/resources/XiSearch_1.6.739.jar
deleted file mode 100755
index 86ff068..0000000
Binary files a/resources/XiSearch_1.6.739.jar and /dev/null differ
diff --git a/resources/XiSearch_1.6.745.jar b/resources/XiSearch_1.6.745.jar
new file mode 100755
index 0000000..263b0f3
Binary files /dev/null and b/resources/XiSearch_1.6.745.jar differ