diff --git a/Pipfile b/Pipfile new file mode 100755 index 0000000..595a01f --- /dev/null +++ b/Pipfile @@ -0,0 +1,27 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] +kiwisolver = ">=1.0.1" +lxml = ">=4.2.3" +matplotlib = ">=2.2.2" +numpy = ">=1.14.5" +pandas = ">=0.23.3" +pyopenms = ">=2.3.0.4" +pyparsing = ">=2.2.0" +pyteomics = ">=3.5.1" +python-dateutil = ">=2.7.3" +pytz = ">=2018.5" +scipy = ">=1.1.0" +seaborn = ">=0.9.0" +six = ">=1.11.0" +subprocess32 = ">=3.5.2" +"backports.functools_lru_cache" = ">=1.5" +Cycler = ">=0.10.0" + +[requires] +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100755 index 0000000..2058dcb --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,267 @@ +{ + "_meta": { + "hash": { + "sha256": "4c7eaca64c1e2d03c4e81329915936b0f9a7d38392910fe4cfcd4ba2bba3e59a" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "backports.functools-lru-cache": { + "hashes": [ + "sha256:9d98697f088eb1b0fa451391f91afb5e3ebde16bbdb272819fd091151fda4f1a", + "sha256:f0b0e4eba956de51238e17573b7087e852dfe9854afd2e9c873f73fc0ca0a6dd" + ], + "index": "pypi", + "version": "==1.5" + }, + "cycler": { + "hashes": [ + "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", + "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8" + ], + "index": "pypi", + "version": "==0.10.0" + }, + "kiwisolver": { + "hashes": [ + "sha256:05b5b061e09f60f56244adc885c4a7867da25ca387376b02c1efc29cc16bcd0f", + "sha256:26f4fbd6f5e1dabff70a9ba0d2c4bd30761086454aa30dddc5b52764ee4852b7", + "sha256:3b2378ad387f49cbb328205bda569b9f87288d6bc1bf4cd683c34523a2341efe", + "sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c", + "sha256:47b8cb81a7d18dbaf4fed6a61c3cecdb5adec7b4ac292bddb0d016d57e8507d5", + "sha256:53eaed412477c836e1b9522c19858a8557d6e595077830146182225613b11a75", + "sha256:58e626e1f7dfbb620d08d457325a4cdac65d1809680009f46bf41eaf74ad0187", + "sha256:5a52e1b006bfa5be04fe4debbcdd2688432a9af4b207a3f429c74ad625022641", + "sha256:5c7ca4e449ac9f99b3b9d4693debb1d6d237d1542dd6a56b3305fe8a9620f883", + "sha256:682e54f0ce8f45981878756d7203fd01e188cc6c8b2c5e2cf03675390b4534d5", + "sha256:79bfb2f0bd7cbf9ea256612c9523367e5ec51d7cd616ae20ca2c90f575d839a2", + "sha256:7f4dd50874177d2bb060d74769210f3bce1af87a8c7cf5b37d032ebf94f0aca3", + "sha256:8944a16020c07b682df861207b7e0efcd2f46c7488619cb55f65882279119389", + "sha256:8aa7009437640beb2768bfd06da049bad0df85f47ff18426261acecd1cf00897", + "sha256:939f36f21a8c571686eb491acfffa9c7f1ac345087281b412d63ea39ca14ec4a", + "sha256:9733b7f64bd9f807832d673355f79703f81f0b3e52bfce420fc00d8cb28c6a6c", + "sha256:a02f6c3e229d0b7220bd74600e9351e18bc0c361b05f29adae0d10599ae0e326", + "sha256:a0c0a9f06872330d0dd31b45607197caab3c22777600e88031bfe66799e70bb0", + "sha256:acc4df99308111585121db217681f1ce0eecb48d3a828a2f9bbf9773f4937e9e", + "sha256:b64916959e4ae0ac78af7c3e8cef4becee0c0e9694ad477b4c6b3a536de6a544", + "sha256:d3fcf0819dc3fea58be1fd1ca390851bdb719a549850e708ed858503ff25d995", + "sha256:d52e3b1868a4e8fd18b5cb15055c76820df514e26aa84cc02f593d99fef6707f", + "sha256:db1a5d3cc4ae943d674718d6c47d2d82488ddd94b93b9e12d24aabdbfe48caee", + "sha256:e3a21a720791712ed721c7b95d433e036134de6f18c77dbe96119eaf7aa08004", + "sha256:e8bf074363ce2babeb4764d94f8e65efd22e6a7c74860a4f05a6947afc020ff2", + "sha256:f16814a4a96dc04bf1da7d53ee8d5b1d6decfc1a92a63349bb15d37b6a263dd9", + "sha256:f2b22153870ca5cf2ab9c940d7bc38e8e9089fa0f7e5856ea195e1cf4ff43d5a", + "sha256:f790f8b3dff3d53453de6a7b7ddd173d2e020fb160baff578d578065b108a05f" + ], + "index": "pypi", + "version": "==1.1.0" + }, + "lxml": { + "hashes": [ + "sha256:06c7616601430aa140a69f97e3116308fffe0848f543b639a5ec2e8920ae72fd", + "sha256:177202792f9842374a8077735c69c41a4282183f7851443d2beb8ee310720819", + "sha256:19317ad721ceb9e39847d11131903931e2794e447d4751ebb0d9236f1b349ff2", + "sha256:36d206e62f3e5dbaafd4ec692b67157e271f5da7fd925fda8515da675eace50d", + "sha256:387115b066c797c85f9861a9613abf50046a15aac16759bc92d04f94acfad082", + "sha256:3ce1c49d4b4a7bc75fb12acb3a6247bb7a91fe420542e6d671ba9187d12a12c2", + "sha256:4d2a5a7d6b0dbb8c37dab66a8ce09a8761409c044017721c21718659fa3365a1", + "sha256:58d0a1b33364d1253a88d18df6c0b2676a1746d27c969dc9e32d143a3701dda5", + "sha256:62a651c618b846b88fdcae0533ec23f185bb322d6c1845733f3123e8980c1d1b", + "sha256:69ff21064e7debc9b1b1e2eee8c2d686d042d4257186d70b338206a80c5bc5ea", + "sha256:7060453eba9ba59d821625c6af6a266bd68277dce6577f754d1eb9116c094266", + "sha256:7d26b36a9c4bce53b9cfe42e67849ae3c5c23558bc08363e53ffd6d94f4ff4d2", + "sha256:83b427ad2bfa0b9705e02a83d8d607d2c2f01889eb138168e462a3a052c42368", + "sha256:923d03c84534078386cf50193057aae98fa94cace8ea7580b74754493fda73ad", + "sha256:b773715609649a1a180025213f67ffdeb5a4878c784293ada300ee95a1f3257b", + "sha256:baff149c174e9108d4a2fee192c496711be85534eab63adb122f93e70aa35431", + "sha256:bca9d118b1014b4c2d19319b10a3ebed508ff649396ce1855e1c96528d9b2fa9", + "sha256:ce580c28845581535dc6000fc7c35fdadf8bea7ccb57d6321b044508e9ba0685", + "sha256:d34923a569e70224d88e6682490e24c842907ba2c948c5fd26185413cbe0cd96", + "sha256:dd9f0e531a049d8b35ec5e6c68a37f1ba6ec3a591415e6804cbdf652793d15d7", + "sha256:ecb805cbfe9102f3fd3d2ef16dfe5ae9e2d7a7dfbba92f4ff1e16ac9784dbfb0", + "sha256:ede9aad2197a0202caff35d417b671f5f91a3631477441076082a17c94edd846", + "sha256:ef2d1fc370400e0aa755aab0b20cf4f1d0e934e7fd5244f3dd4869078e4942b9", + "sha256:f2fec194a49bfaef42a548ee657362af5c7a640da757f6f452a35da7dd9f923c" + ], + "index": "pypi", + "version": "==4.3.4" + }, + "matplotlib": { + "hashes": [ + "sha256:1febd22afe1489b13c6749ea059d392c03261b2950d1d45c17e3aed812080c93", + "sha256:31a30d03f39528c79f3a592857be62a08595dec4ac034978ecd0f814fa0eec2d", + "sha256:4442ce720907f67a79d45de9ada47be81ce17e6c2f448b3c64765af93f6829c9", + "sha256:796edbd1182cbffa7e1e7a97f1e141f875a8501ba8dd834269ae3cd45a8c976f", + "sha256:934e6243df7165aad097572abf5b6003c77c9b6c480c3c4de6f2ef1b5fdd4ec0", + "sha256:bab9d848dbf1517bc58d1f486772e99919b19efef5dd8596d4b26f9f5ee08b6b", + "sha256:c1fe1e6cdaa53f11f088b7470c2056c0df7d80ee4858dadf6cbe433fcba4323b", + "sha256:e5b8aeca9276a3a988caebe9f08366ed519fff98f77c6df5b64d7603d0e42e36", + "sha256:ec6bd0a6a58df3628ff269978f4a4b924a0d371ad8ce1f8e2b635b99e482877a" + ], + "index": "pypi", + "version": "==3.1.1" + }, + "numpy": { + "hashes": [ + "sha256:0778076e764e146d3078b17c24c4d89e0ecd4ac5401beff8e1c87879043a0633", + "sha256:141c7102f20abe6cf0d54c4ced8d565b86df4d3077ba2343b61a6db996cefec7", + "sha256:14270a1ee8917d11e7753fb54fc7ffd1934f4d529235beec0b275e2ccf00333b", + "sha256:27e11c7a8ec9d5838bc59f809bfa86efc8a4fd02e58960fa9c49d998e14332d5", + "sha256:2a04dda79606f3d2f760384c38ccd3d5b9bb79d4c8126b67aff5eb09a253763e", + "sha256:3c26010c1b51e1224a3ca6b8df807de6e95128b0908c7e34f190e7775455b0ca", + "sha256:52c40f1a4262c896420c6ea1c6fda62cf67070e3947e3307f5562bd783a90336", + "sha256:6e4f8d9e8aa79321657079b9ac03f3cf3fd067bf31c1cca4f56d49543f4356a5", + "sha256:7242be12a58fec245ee9734e625964b97cf7e3f2f7d016603f9e56660ce479c7", + "sha256:7dc253b542bfd4b4eb88d9dbae4ca079e7bf2e2afd819ee18891a43db66c60c7", + "sha256:94f5bd885f67bbb25c82d80184abbf7ce4f6c3c3a41fbaa4182f034bba803e69", + "sha256:a89e188daa119ffa0d03ce5123dee3f8ffd5115c896c2a9d4f0dbb3d8b95bfa3", + "sha256:ad3399da9b0ca36e2f24de72f67ab2854a62e623274607e37e0ce5f5d5fa9166", + "sha256:b0348be89275fd1d4c44ffa39530c41a21062f52299b1e3ee7d1c61f060044b8", + "sha256:b5554368e4ede1856121b0dfa35ce71768102e4aa55e526cb8de7f374ff78722", + "sha256:cbddc56b2502d3f87fda4f98d948eb5b11f36ff3902e17cb6cc44727f2200525", + "sha256:d79f18f41751725c56eceab2a886f021d70fd70a6188fd386e29a045945ffc10", + "sha256:dc2ca26a19ab32dc475dbad9dfe723d3a64c835f4c23f625c2b6566ca32b9f29", + "sha256:dd9bcd4f294eb0633bb33d1a74febdd2b9018b8b8ed325f861fffcd2c7660bb8", + "sha256:e8baab1bc7c9152715844f1faca6744f2416929de10d7639ed49555a85549f52", + "sha256:ec31fe12668af687b99acf1567399632a7c47b0e17cfb9ae47c098644ef36797", + "sha256:f12b4f7e2d8f9da3141564e6737d79016fe5336cc92de6814eba579744f65b0a", + "sha256:f58ac38d5ca045a377b3b377c84df8175ab992c970a53332fa8ac2373df44ff7" + ], + "index": "pypi", + "version": "==1.16.4" + }, + "pandas": { + "hashes": [ + "sha256:071e42b89b57baa17031af8c6b6bbd2e9a5c68c595bc6bf9adabd7a9ed125d3b", + "sha256:17450e25ae69e2e6b303817bdf26b2cd57f69595d8550a77c308be0cd0fd58fa", + "sha256:17916d818592c9ec891cbef2e90f98cc85e0f1e89ed0924c9b5220dc3209c846", + "sha256:2538f099ab0e9f9c9d09bbcd94b47fd889bad06dc7ae96b1ed583f1dc1a7a822", + "sha256:366f30710172cb45a6b4f43b66c220653b1ea50303fbbd94e50571637ffb9167", + "sha256:42e5ad741a0d09232efbc7fc648226ed93306551772fc8aecc6dce9f0e676794", + "sha256:4e718e7f395ba5bfe8b6f6aaf2ff1c65a09bb77a36af6394621434e7cc813204", + "sha256:4f919f409c433577a501e023943e582c57355d50a724c589e78bc1d551a535a2", + "sha256:4fe0d7e6438212e839fc5010c78b822664f1a824c0d263fd858f44131d9166e2", + "sha256:5149a6db3e74f23dc3f5a216c2c9ae2e12920aa2d4a5b77e44e5b804a5f93248", + "sha256:627594338d6dd995cfc0bacd8e654cd9e1252d2a7c959449228df6740d737eb8", + "sha256:83c702615052f2a0a7fb1dd289726e29ec87a27272d775cb77affe749cca28f8", + "sha256:8c872f7fdf3018b7891e1e3e86c55b190e6c5cee70cab771e8f246c855001296", + "sha256:90f116086063934afd51e61a802a943826d2aac572b2f7d55caaac51c13db5b5", + "sha256:a3352bacac12e1fc646213b998bce586f965c9d431773d9e91db27c7c48a1f7d", + "sha256:bcdd06007cca02d51350f96debe51331dec429ac8f93930a43eb8fb5639e3eb5", + "sha256:c1bd07ebc15285535f61ddd8c0c75d0d6293e80e1ee6d9a8d73f3f36954342d0", + "sha256:c9a4b7c55115eb278c19aa14b34fcf5920c8fe7797a09b7b053ddd6195ea89b3", + "sha256:cc8fc0c7a8d5951dc738f1c1447f71c43734244453616f32b8aa0ef6013a5dfb", + "sha256:d7b460bc316064540ce0c41c1438c416a40746fd8a4fb2999668bf18f3c4acf1" + ], + "index": "pypi", + "version": "==0.24.2" + }, + "pyopenms": { + "hashes": [ + "sha256:611669bddb6653ec05fffbb141363fbe908baf6f5f254067562e8013667ca6b3", + "sha256:6b3c2907f8eb6be4436f41b8b2bc5e7e6bb02bac3a689e0b2cb1032b747cfcf1", + "sha256:814889a889a16fee614e9158e84d7613a32a85de91a5c1aa614b5fa6e5a68353", + "sha256:8b76877c1c4a1d90034996f6500bbc5ece3da6388f206b73b3e11f496df66f0e", + "sha256:913d39a5c0aaf73d34b53e01228543c2cde60c6cb2164edcd65e61751afca773", + "sha256:9348b132b65b1200a2259685a161649e7190dce7ea084f93ea820d46f89bba8f", + "sha256:97cdb38529cc89e346d209fcaf947dc89ac97a972224dfdcd2dc02fd4a82abad", + "sha256:b37bdb071708a86366a29305c5c485b96a8dc5ac0cf2766b86eb5c740e2aaf71", + "sha256:c962f86f3e29785d770317554a3eebeb7c612ae0eab67be4ec783336b6d95766", + "sha256:d9d8a0c629551d6c0326ce5a4b7354e7bf9c44f63e765fc18b2db5191d702e4f", + "sha256:f75622c9f92c056faa685311c0e754e4ace6b7745dead9c756de25d8ccfff106", + "sha256:f96e206d58a61ea68419160167c1702ddc9d7beffa6b7a725ea8e3435d007f0e" + ], + "index": "pypi", + "version": "==2.4.0" + }, + "pyparsing": { + "hashes": [ + "sha256:1873c03321fc118f4e9746baf201ff990ceb915f433f23b395f5580d1840cb2a", + "sha256:9b6323ef4ab914af344ba97510e966d64ba91055d6b9afa6b30799340e89cc03" + ], + "index": "pypi", + "version": "==2.4.0" + }, + "pyteomics": { + "hashes": [ + "sha256:3c72ca6e521c0f2183d1c002935e5662b857711425179dc30038665c38fc2a53", + "sha256:8eef7f325bb818fe403e3c0859b36eecd22efbd4012d7610f51479d88ced72a2" + ], + "index": "pypi", + "version": "==4.1.2" + }, + "python-dateutil": { + "hashes": [ + "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", + "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" + ], + "index": "pypi", + "version": "==2.8.0" + }, + "pytz": { + "hashes": [ + "sha256:303879e36b721603cc54604edcac9d20401bdbe31e1e4fdee5b9f98d5d31dfda", + "sha256:d747dd3d23d77ef44c6a3526e274af6efeb0a6f1afd5a69ba4d5be4098c8e141" + ], + "index": "pypi", + "version": "==2019.1" + }, + "scipy": { + "hashes": [ + "sha256:03b1e0775edbe6a4c64effb05fff2ce1429b76d29d754aa5ee2d848b60033351", + "sha256:09d008237baabf52a5d4f5a6fcf9b3c03408f3f61a69c404472a16861a73917e", + "sha256:10325f0ffac2400b1ec09537b7e403419dcd25d9fee602a44e8a32119af9079e", + "sha256:1db9f964ed9c52dc5bd6127f0dd90ac89791daa690a5665cc01eae185912e1ba", + "sha256:409846be9d6bdcbd78b9e5afe2f64b2da5a923dd7c1cd0615ce589489533fdbb", + "sha256:4907040f62b91c2e170359c3d36c000af783f0fa1516a83d6c1517cde0af5340", + "sha256:6c0543f2fdd38dee631fb023c0f31c284a532d205590b393d72009c14847f5b1", + "sha256:826b9f5fbb7f908a13aa1efd4b7321e36992f5868d5d8311c7b40cf9b11ca0e7", + "sha256:a7695a378c2ce402405ea37b12c7a338a8755e081869bd6b95858893ceb617ae", + "sha256:a84c31e8409b420c3ca57fd30c7589378d6fdc8d155d866a7f8e6e80dec6fd06", + "sha256:adadeeae5500de0da2b9e8dd478520d0a9945b577b2198f2462555e68f58e7ef", + "sha256:b283a76a83fe463c9587a2c88003f800e08c3929dfbeba833b78260f9c209785", + "sha256:c19a7389ab3cd712058a8c3c9ffd8d27a57f3d84b9c91a931f542682bb3d269d", + "sha256:c3bb4bd2aca82fb498247deeac12265921fe231502a6bc6edea3ee7fe6c40a7a", + "sha256:c5ea60ece0c0c1c849025bfc541b60a6751b491b6f11dd9ef37ab5b8c9041921", + "sha256:db61a640ca20f237317d27bc658c1fc54c7581ff7f6502d112922dc285bdabee" + ], + "index": "pypi", + "version": "==1.3.0" + }, + "seaborn": { + "hashes": [ + "sha256:42e627b24e849c2d3bbfd059e00005f6afbc4a76e4895baf44ae23fe8a4b09a5", + "sha256:76c83f794ca320fb6b23a7c6192d5e185a5fcf4758966a0c0a54baee46d41e2f" + ], + "index": "pypi", + "version": "==0.9.0" + }, + "six": { + "hashes": [ + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + ], + "index": "pypi", + "version": "==1.12.0" + }, + "subprocess32": { + "hashes": [ + "sha256:88e37c1aac5388df41cc8a8456bb49ebffd321a3ad4d70358e3518176de3a56b", + "sha256:eb2937c80497978d181efa1b839ec2d9622cf9600a039a79d0e108d1f9aec79d" + ], + "index": "pypi", + "version": "==3.5.4" + } + }, + "develop": {} +} diff --git a/ProteoFileReader.py b/ProteoFileReader.py index 6d7cbb6..dd816cd 100755 --- a/ProteoFileReader.py +++ b/ProteoFileReader.py @@ -13,6 +13,125 @@ import numpy as np import sys import pyopenms as oms +import io +import os + + +def read_mgf(mgf_file): + mgf_reader = io.open(mgf_file, "r") + + spectra = [] + peaks = [] + RT, pep_mz, pep_int, charge, scanID, ms2id = -1, -1, -1, -1, -1, -1 + title, detector, fragmethod = "", "", "" + peak_re = re.compile(r'^([0-9.e+\-]+)\s([0-9.e+\-]+)') + + for line in mgf_reader: + if len(line.strip()) == 0: + continue + if re.match(peak_re, line): + mz_int_list = re.match(peak_re, line).groups() + peaks.append([float(x) for x in mz_int_list]) + elif line.startswith("TITLE"): + title = line.replace('TITLE=', '').strip() + # scan_match = re.search("^TITLE=[^.]*.([0-9]+).", line) + # ms2id_scan_match = re.search("ms2_scanId=([0-9]+)", line) + # scanID = int(scan_match.groups()[0]) + # if ms2id_scan_match: + # ms2id = int(ms2id_scan_match.groups()[0]) + # else: + # ms2id = -1 + + elif line.startswith("RTINSECONDS"): + RT = float(re.search("RTINSECONDS=(.*)", line).groups()[0]) + + elif line.startswith("PEPMASS"): + precursor = re.search("PEPMASS=(.*)", line).groups()[0].split() + pep_mz = float(precursor[0]) + try: + pep_int = float(precursor[1]) + except: + pep_int = -1.0 + + elif line.startswith("CHARGE"): + charge = float(re.search("CHARGE=(-?\d)", line).groups()[0]) + + elif line.startswith("DETECTOR"): + detector = re.search("DETECTOR=(.*)", line).groups()[0].strip() + + elif line.startswith("FRAGMETHOD"): + fragmethod = re.search("FRAGMETHOD=(.*)", line).groups()[0].strip() + + elif "END IONS" in line: + spectra.append( + MS2_spectrum(title, RT, pep_mz, pep_int, charge, peaks, detector=detector, fragmethod=fragmethod) + ) + peaks = [] + title, detector, fragmethod = "", "", "" + RT, pep_mz, pep_int, charge, scanID, ms2id = -1, -1, -1, -1, -1, -1 + + return spectra + + +def write_mgf(spectra, outfile): + out_writer = open(os.path.join(outfile), "w") + out_writer.write('MASS=Monoisotopic\n') + for spectrum in spectra: + title = spectrum.getTitle() + # scan = re.search('scan=[0-9]*', title).group(0)[5:] + # try: + # title = re.match('([A-Z])[0-9]{6}_[0-9]{2}.+?( )', title).group(0)[:-1] + # except AttributeError: + # title = re.match('[0-9]{8}_[0-9]{2}.+?( )', title).group(0)[:-1] + # title = '.'.join([title, scan, scan, str(int(spectrum.charge))]) + if 'ms2_scanId' in spectrum.getTitle(): + try: + ms2_parent = re.search('ms2_scanId=.*scan=([0-9]+)', spectrum.getTitle()).groups()[0] + except AttributeError: + ms2_parent = 0 + title += ' ms2_scanId=%s' % ms2_parent + stavrox_mgf = """ +BEGIN IONS +TITLE={} +PEPMASS={} {} +CHARGE={}+ +RTINSECONDS={} +DETECTOR={} +FRAGMETHOD={} +{} +END IONS""".format( + title, + spectrum.getPrecursorMZ(), + spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, + int(spectrum.charge), + spectrum.getRT(), + spectrum.getDetector(), + spectrum.getFragMethod(), + "\n".join([f"{mz} {i}" for mz, i in spectrum.peaks if i > 0]) + ) + out_writer.write(stavrox_mgf) + + +def split_mgf_methods(mgf_in_file): + ms2_spectra = read_mgf(mgf_in_file) + + methods = [ + "CID", + "HCD", + "ETD", + "ETciD", + "EThcD" + ] + + for method in methods: + split_spectra = [spectrum for spectrum in ms2_spectra if spectrum.getFragMethod() == method] + + if len(split_spectra) > 0: + out_file_name = '%s_%s' % (method, os.path.split(mgf_in_file)[1]) + out_file_path = os.path.join(os.path.split(mgf_in_file)[0], out_file_name) + + write_mgf(split_spectra, out_file_path) + def mzMLReader(in_file): """ @@ -26,7 +145,8 @@ def mzMLReader(in_file): file = oms.MzMLFile() exp = oms.MSExperiment() file.load(in_file, exp) - return(exp) + return exp + class MS2_spectrum(): """ @@ -50,64 +170,71 @@ class MS2_spectrum(): charge array for the peaks """ - def __init__(self, title, RT, pepmass, pepint, charge, peaks, peakcharge=[]): + def __init__(self, title, RT, pepmz, pepint, charge, peaks, peakcharge=[], fragmethod='', detector=''): self.title = title self.RT = RT - self.pepmz = pepmass + self.pepmz = pepmz self.pepint = pepint self.charge = charge self.peaks = peaks self.peakcharge = peakcharge + self.fragMethod = fragmethod + self.detector = detector def getPrecursorMZ(self): """ Returns the precursor mass """ - return(self.pepmz) + return self.pepmz def getPrecursorIntensity(self): """ Returns the precursor intensity """ - return(self.pepint) + return self.pepint def getRT(self): """ Returns the precursor RT """ - return(self.RT) + return self.RT def getTitle(self): """ Returns the precursor mass """ - return(self.title) + return self.title def getPeaks(self): """ Returns the spectrum peaks """ - return(self.peaks) + return self.peaks def getMZ(self): """ Returns the mz of the MS2 """ - return(self.peaks[:,0]) + return self.peaks[:, 0] def getIntensities(self): """ Returns the MS2 peak intensities """ - return(self.peaks[:,1]) + return self.peaks[:, 1] def getUnchargedMass(self): """ Computs the uncharged mass of a fragment: - uncharged_mass = (mz * z ) - z - TODO: fix Hydrogen mass! + uncharged_mass = (mz - hydrogen mass) * z """ - return( (self.pepmass * self.charge) - self.charge) + return (self.pepmz - 1.007276466879) * self.charge + + def getFragMethod(self): + return self.fragMethod + + def getDetector(self): + return self.detector def printf(self): print ("Title, RT, PEPMASS, PEPINT, CHARGE") @@ -117,7 +244,7 @@ def to_mgf(self): # need dummy values in case no peak charges are in the data if len(self.peakcharge) == 0: self.peakcharge = [""]*self.peaks.shape[0] - mgf_str=""" + mgf_str = """ BEGIN IONS TITLE=%s RTINSECONDS=%s @@ -126,7 +253,7 @@ def to_mgf(self): %s END IONS """ % (self.title, self.RT, self.pepmz, self.pepint, self.charge, "\r\n".join(["%s %s %s" % (i[0], i[1], j, ) for i,j in zip(self.peaks, self.peakcharge)])) - return(mgf_str) + return mgf_str #============================================================================== @@ -343,6 +470,6 @@ def store(self, out_file, ms_list): charge=%s %s peaklist end -""" % (ms.title, ms.pepmass, ms.charge, "\r\n".join(["%s %s" % (i, j ) for i,j in ms.peaks])) +""" % (ms.title, ms.pepmass, ms.charge, "\r\n".join(["%s %s" % (i, j) for i,j in ms.peaks])) out_mgf.write(mgf_str) out_mgf.close() diff --git a/gui.py b/gui.py deleted file mode 100755 index 36ad1b8..0000000 --- a/gui.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import preprocessing -from multiprocessing import Pool -from functools import partial -import sys - -sys.path.append('D:/software/wxpython/wx-3.0-msw') -from gooey import Gooey, GooeyParser - - -@Gooey( - program_description="Converts raw files to mgfs whith optional MS2 denoising.", - default_size=(610, 400)) -# if __name__ == '__main__': -def main(): - parser = GooeyParser() - parser.add_argument('input', widget="DirChooser") - parser.add_argument('output', widget="DirChooser") - parser.add_argument('config', widget="FileChooser") - - args = parser.parse_args() - - full_paths = [os.path.join(args.input, rawfile) for rawfile in os.listdir(args.input)] - full_paths = [x for x in full_paths if not os.path.isdir(x)] - - if args.input == args.output: - args.output = os.path.join(args.output, 'processed') - - execfile(args.config, globals()) - - pool = Pool(processes=int(nthr)) - pool.map(partial(preprocessing.process_file, outdir=args.output, mscon_settings=mscon_settings, split_acq=split_acq, - detector_filter=detector_filter, mscon_exe=msconvert_exe), full_paths) - pool.close() - pool.join() - - -if __name__ == '__main__': - main() diff --git a/mass_recal.py b/mass_recal.py index b159e78..466ad2e 100755 --- a/mass_recal.py +++ b/mass_recal.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import ProteoFileReader -import sys def xi_wrapper(arguments): @@ -51,7 +50,7 @@ def get_ppm_error(xi_df, outfile): err = input('Enter error to correct by (0 for no correction):\n') try: err = float(err) - if (err != 0): + if err != 0: return err elif err == 0: return 0 @@ -61,50 +60,22 @@ def get_ppm_error(xi_df, outfile): return median_err -def adjust_prec_mz(mgf_file, error, outpath): - outfile = os.path.join(outpath, 'recal_' + os.path.split(mgf_file)[1]) - if not os.path.exists(outpath): - os.makedirs(outpath) +def adjust_prec_mz(mgf_file, error, out_path): + outfile = os.path.join(out_path, 'recal_' + os.path.split(mgf_file)[1]) + if not os.path.exists(out_path): + os.makedirs(out_path) elif os.path.isfile(outfile): - return - exp = ProteoFileReader.MGF_Reader() - exp.load(mgf_file) - - out_writer = open(os.path.join(outfile), "w") - for spectrum in exp: - prec_mz_new = spectrum.getPrecursorMass()/(1-error/10.**6) - if sys.version_info.major < 3: - stavrox_mgf = """ -MASS=Monoisotopic -BEGIN IONS -TITLE={} -PEPMASS={} {} -CHARGE={}+ -RTINSECONDS={} -{} -END IONS """.format(spectrum.getTitle(), - prec_mz_new, spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, - int(spectrum.charge), spectrum.getRT(), - "\n".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks if i[1] > 0])) - else: - stavrox_mgf = """ -MASS=Monoisotopic -BEGIN IONS -TITLE={} -PEPMASS={} {} -CHARGE={}+ -RTINSECONDS={} -{} -END IONS """.format(spectrum.getTitle(), - prec_mz_new, - spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, - int(spectrum.charge), spectrum.getRT(), - "\n".join(["%s %s" % (mz, spectrum.peaks[1][i]) for i, mz in enumerate(spectrum.peaks[0]) if - spectrum.peaks[1][i] > 0])) - out_writer.write(stavrox_mgf) - - -def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.739.jar', val_input=None): + raise Exception('File %s already exists!' % outfile) + + ms2_spectra = ProteoFileReader.read_mgf(mgf_file) + + for spectrum in ms2_spectra: + spectrum.pepmz = spectrum.getPrecursorMZ() / (1 - error / 10.0 ** 6) + + ProteoFileReader.write_mgf(ms2_spectra, outfile) + + +def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.745.jar', val_input=None): if not os.path.exists(outpath): os.makedirs(outpath) @@ -124,8 +95,8 @@ def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6. ms1_input = pd.read_csv(val_input, header=None, index_col=0) ms1_err = ms1_input[ms1_input.index.str.contains('_'.join(filename.split('_')[1:]))].values[0][0] - if ms1_err is not None: # shift all old m/z by value - adjust_prec_mz(mgf_file=mgf, error=ms1_err, outpath=os.path.join(outpath)) + if ms1_err is not None: # shift all old m/z by value + adjust_prec_mz(mgf_file=mgf, error=ms1_err, out_path=os.path.join(outpath)) if __name__ == '__main__': diff --git a/mass_recal_ms2.py b/mass_recal_ms2.py new file mode 100755 index 0000000..32c0a44 --- /dev/null +++ b/mass_recal_ms2.py @@ -0,0 +1,127 @@ +import os +import subprocess +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +import pandas as pd +import ProteoFileReader + + +def xi_wrapper(arguments): + xi = subprocess.Popen(arguments) + xi.communicate() + return + + +def run_xi_lin(peakfile, fasta, cnf, outpath, xipath, threads='1'): + if not os.path.exists(outpath): + os.makedirs(outpath) + elif os.path.isfile(outpath + '/xi_' + os.path.split(peakfile)[1].replace('.mgf', '.csv')): + return + + xi_cmds = ['java', '-cp', os.path.join(os.path.dirname(os.path.realpath(__file__)), xipath), + 'rappsilber.applications.Xi', # + '/fastutil-8.1.0.jar;' + xipath + '/XiSearch.jar' + '--fasta=' + fasta, + '--xiconf=UseCPUs:' + threads, + '--peaks=' + peakfile, + '--config=' + cnf, + '--output=' + outpath + '/xi_' + os.path.split(peakfile)[1].replace('.mgf', '.csv'), + '--peaksout=%s_peaks.csv.gz' % peakfile[:len(peakfile) - 4]] + + print('calling ' + subprocess.list2cmdline(xi_cmds)) + xi = subprocess.Popen(xi_cmds) #, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + xi.communicate() + + +def get_ppm_error(xi_df, peaks_df, outfile): + xi_df = xi_df[(xi_df.decoy == 0) & (xi_df['match score'] > 6)] + median_err = np.median(xi_df['Precoursor Error']) + try: + fig, ax = plt.subplots() + sns.distplot(xi_df['Precoursor Error'], norm_hist=False, kde=False) + ax.axvline(median_err) + plt.savefig(outfile) + plt.close() + except ZeroDivisionError: + print(xi_df['Precoursor Error'][:5]) + + if len(xi_df) < 75: + print(os.path.split(outfile)[1] + ': Only %s PSMs found. Median error is %s.' % (len(xi_df), median_err)) + err = input('Enter error to correct by (0 for no correction):\n') + try: + # err = float(err) + if err != '0': + return float(err), 0 + elif err == '0': + return 0, 0 + except ValueError: + return 0, 0 + + xi_ms2_df = peaks_df[peaks_df["IsPrimaryMatch"] == 1] + xi_ms2_df["MS2Error_ppm"] = (xi_ms2_df["MS2Error"] * 10. ** 6) / xi_ms2_df["CalcMZ"] + xi_ms2_df = xi_ms2_df.merge(xi_df[['Scan', 'Run', 'decoy']], + left_on=['ScanNumber', 'Run'], right_on=['Scan', 'Run'], how='inner') + xi_ms2_df = xi_ms2_df[(xi_ms2_df["MS2Error_ppm"] <= 30) & (xi_ms2_df["MS2Error_ppm"] >= -30)] + median_err_ms2 = np.median(xi_ms2_df["MS2Error_ppm"]) + + fig, ax = plt.subplots() + sns.distplot(xi_ms2_df["MS2Error_ppm"], norm_hist=False, kde=False) + ax.axvline(median_err_ms2) + plt.xlabel("mass error") + plt.title("MS2 Error distribution \n median: " + str(median_err_ms2)) + plt.ylabel("# of identifications") + plt.xlim(-20, 20) + plt.savefig(os.path.join(outfile.replace('MS1', "MS2"))) + plt.close() + + return median_err, median_err_ms2 + + +def adjust_prec_mz(mgf_file, ms1_error, ms2_error, outpath): + outfile = os.path.join(outpath, 'recal_' + os.path.split(mgf_file)[1]) + if not os.path.exists(outpath): + os.makedirs(outpath) + elif os.path.isfile(outfile): + raise Exception('File %s already exists!' % outfile) + ms2_spectra = ProteoFileReader.read_mgf(mgf_file) + + for spectrum in ms2_spectra: + # ms1/precursor correction + spectrum.pepmz = spectrum.getPrecursorMZ() / (1 + ms1_error / 10.0 ** 6) # TODO wrong sign if newer version + + # ms2 peak correction + ms2_peaks = spectrum.getPeaks() + for i in range(0, len(ms2_peaks)): + ms2_peaks[i][0] = ms2_peaks[i][0] / (1 + ms2_error / 10. ** 6) + + spectrum.peaks = ms2_peaks + + ProteoFileReader.write_mgf(ms2_spectra, outfile) + + +def main(mgf, fasta, xi_cnf, outpath, threads, xi_jar='./resources/XiSearch_1.6.745.jar', val_input=None): + if not os.path.exists(outpath): + os.makedirs(outpath) + + filename = os.path.split(mgf)[1] + if val_input is None: + # linear small search in Xi + run_xi_lin(peakfile=mgf, fasta=fasta, cnf=xi_cnf, outpath=os.path.join(outpath), xipath=xi_jar, threads=threads) + + xi_df = pd.read_csv(os.path.join(outpath, 'xi_' + filename.replace('.mgf', '.csv'))) + peaks_df = pd.read_csv(os.path.join(outpath, filename.replace('.mgf', '_peaks.csv.gz')), + sep='\t', index_col=False, thousands=',') + # evaluate results, get median ms1 error + ms1_err, ms2_err = get_ppm_error(xi_df=xi_df, peaks_df=peaks_df, + outfile=os.path.join(outpath, 'MS1_err_' + filename + '.png')) + + error_file = open(outpath + '/ms1_err.csv', 'a') + error_file.write(filename + ',' + str(ms1_err) + '\n') + error_file.close() + else: + ms1_input = pd.read_csv(val_input, header=None, index_col=0) + ms1_err = ms1_input[ms1_input.index.str.contains('_'.join(filename.split('_')[1:]))].values[0][0] + ms2_err = 0 # ToDo val input for ms2 error? + + if ms1_err is not None: # shift all old m/z by value + adjust_prec_mz(mgf_file=mgf, ms1_error=ms1_err, ms2_error=ms2_err, outpath=os.path.join(outpath)) diff --git a/mass_trace.py b/mass_trace.py index 0fe4530..a14b889 100755 --- a/mass_trace.py +++ b/mass_trace.py @@ -84,7 +84,7 @@ def extend_mass_mz(exp, MS1scan, mz, seed_scan, scans_masstrace, currentcount = 0 while RTdiff <= RTdiff_max: currentcount += 1 - if currentcount >= maxcount: + if currentcount >= maxcount or (MS1scan + scan_it <= 1): # print ("max nscan diff reached") # TODO: check if really sensible (data I checked seemed ok for a bit over 200) break scan_it += constant diff --git a/mps_ms1.py b/mps_ms1.py index 00eb334..9e62d4f 100755 --- a/mps_ms1.py +++ b/mps_ms1.py @@ -1,234 +1,234 @@ -import os -import pyopenms as oms -import numpy as np -from ProteoFileReader import MGF_Reader -import mass_trace -from joblib import Parallel, delayed -import re - - -def add_relaxation_mgf(mgf, mps, outfile, create_comparison=False): - # mgf_file, outdir, differences = argls[0], argls[1], argls[2] - mass_diff = 1.00335483 - # if '.mgf' in args['file']: - filename = os.path.split(mgf)[1] - # read mgf - spectra = MGF_Reader() - spectra.load(mgf) - out_writer = open(outfile, "w") - for spectrum in spectra: - # calculate mass (neglect proton bec. later on difference used) - regex_match = re.search('(scan=)[0-9]*', spectrum.getTitle()) - if regex_match is not None: - scan = int(regex_match.group(0).split('scan=')[1]) - else: - scan = int(spectrum.getTitle().split('.')[-2]) - # scan = int(spectrum.getTitle().split('.')[-2]) - # try: - differences = [0, 1, 2, 3, 4] #[0, -1, -2, -3, -4] - if not create_comparison: - row = mps[mps[:, 0] == scan, 1:] - if len(row) == 1: - differences = [-i for i in range(len(row[0])) if row[0][i] == 1] - elif len(row) > 1: - raise ValueError('multiple matches to scan %s' % scan) - else: - print 'scan %s not found' % scan - - mass = spectrum.getPrecursorMass() * spectrum.charge - spectra_add_mip = [str((mass + x * mass_diff) / spectrum.charge) for x in differences if x != 0] - if 0 in differences: - prec_mz = spectrum.getPrecursorMass() - else: - prec_mz = spectra_add_mip[0] - spectra_add_mip = spectra_add_mip[1:] - # except KeyError: - # differences = [-2, -1, 0] - - stavrox_mgf = """ -MASS=Monoisotopic -BEGIN IONS -TITLE={} -PEPMASS={} {} -CHARGE={}+ -RTINSECONDS={} -ADDITIONALMZ={} -{} -END IONS """.format(spectrum.getTitle(), prec_mz, - spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, - int(spectrum.charge), spectrum.getRT(), - ';'.join(spectra_add_mip), - "\r".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks])) - out_writer.write(stavrox_mgf) - - -def read_mzml(infile): - # init variables - mzml_file = oms.MzMLFile() - exp = oms.MSExperiment() - - # load spectra into exp - mzml_file.load(infile, exp) - return (exp) - - -def get_error(mz1, mz2, charge=None, ppm=True): - if ppm: - return abs(mz1 - mz2) / mz2 * 1e6 - else: - if charge is not None: - return (mz1 - mz2) * charge - - -def return_mps_range(intensity, mps_max): - if False: #intensity >= 10e6: - return [-1] - else: - return list(range(-1, mps_max - 1, -1)) - - -def ms1_peaks(exp, tolerance=6, mps_range=[-1, -2, -3, -4]): # mps_range=[-1, -2, -3, -4] - # loop through spectra and make count if precursor in MS2 spectrum - peaks_found = [] - n_ms2 = 0 - nspectra = exp.size() - for i, spectrum in enumerate(exp): - - if spectrum.getMSLevel() == 1: - continue - n_ms2 += 1 - if i % 10000 == 0: - print "{}/{} done..".format(i, nspectra) - - # if iidone not in matched_scans: - # continue - precursor = spectrum.getPrecursors()[0] - prec_mz, prec_charge = precursor.getMZ(), precursor.getCharge() - MS1scan = mass_trace.find_parent_MS1_scan(exp, i) - ppm_pseudo = 10 # taken from Svens script, apparently not used in function - mz_trace, scans_trace = mass_trace.extract_mass_trace(exp, MS1scan, prec_mz, prec_charge, ppm_pseudo, tolerance, 10) # test with 20 to see difference - if len(scans_trace) == 1: - tmp_ms1 = exp[MS1scan] # TODO find out why mass_trace does not find anything - else: - # try: - best_isotope_seed = np.argmax(mz_trace[:, 1]) - # except: - # pass - best_seed_spectrum = scans_trace[best_isotope_seed] - - # ms1_prev = 0 - # for j in range(i, 0, -1): - tmp_ms1 = exp[best_seed_spectrum] - - res = tmp_ms1[tmp_ms1.findNearest(prec_mz)] - if abs(res.getMZ() - prec_mz) / prec_mz <= tolerance: - prec_int = res.getIntensity() - # mps_range = return_mps_range(prec_int, mps_max) - # if prec_int >= 3e6: - # peaks_found.append([i + 1, True] + [False] * len(mps_range)) - # continue - # else: - # peaks_found.append([i + 1, True] + [True] * len(mps_range)) - # continue - theo_mip = np.array([prec_mz + (mip_i * 1.00335483) / prec_charge for mip_i in mps_range]) - mip_nearest = [tmp_ms1.findNearest(x) for x in theo_mip] - error = np.array([get_error(tmp_ms1[mea].getMZ(), expi, ppm=True) for expi, mea in - zip(theo_mip, mip_nearest)]) - range_found = [True if x <= tolerance else False for x in error] - if sum(range_found) == 0: - # TODO: try if sensible to not mps search these - if len(mps_range) > 2: - peaks_found.append( - # [i + 1, True] + [True] * len(mps_range) - # [i + 1, True, True, True] + [False] * (len(mps_range) - 2) - # [i + 1, True, True, True, True, False] - [i + 1, True] + [True] * (len(mps_range) - 1) + [False] - ) - else: - peaks_found.append( - [i + 1, True] + [True] * len(mps_range) - ) - continue - else: - # check for continous peaks except -1 peak - # TODO: allow gap? - found = False - lightest_peak = 1 - for i_mip in range(len(range_found), 1, -1): - if range_found[i_mip - 1] & (i_mip > lightest_peak): - lightest_peak = i_mip - if sum(range_found[:i_mip]) == len(range_found[:i_mip]): - # if i_mip == len(range_found): - # sel = [False] * (i_mip - 2) + range_found[i_mip - 2:] - # else: - # sel = [False] * (i_mip - 2) + [True] * 3 + [False] * (len(range_found) - 1 - i_mip) # 2 - # takes lightest 2 continous + existing lighter peaks, excludes heaviar - sel = [False] * (i_mip - 2) + range_found[i_mip - 2:] # 2 - peaks_found.append([i + 1, False] + sel) - found = True - break - # if no continous found take all - if not found: - if not lightest_peak == len(range_found): - peaks_found.append( - # [i + 1, True] + [True] * len(mps_range) - # [i + 1, True] + [True] * (lightest_peak + 1) + [False] * (len(range_found) - lightest_peak - 1) - [i + 1, True] + [True] * (lightest_peak) + [False] * (len(range_found) - lightest_peak) - ) - else: - peaks_found.append([i + 1, True] + [True] * len(mps_range)) - continue - - else: - print 'Precursor not found' - continue - - return np.array(peaks_found) - - -def main(mzmlfile, exp_id, setting, infoout_dir, mgf_in_dir, mgf_out_dir): - exp = read_mzml(mzmlfile) - # exp_id = mzml_file[:10] - - mps_df = ms1_peaks(exp) - np.savetxt(infoout_dir + '/%s_%s.csv' % (setting, exp_id), mps_df, delimiter=',') - corresponding_mgf = [x for x in os.listdir(mgf_in_dir) if exp_id in x][0] - add_relaxation_mgf(mgf=mgf_in_dir + corresponding_mgf, mps=mps_df, - outfile=mgf_out_dir + '/%s_' % setting + corresponding_mgf) - -if __name__ == '__main__': - isotope_diff = 1.00335483 - # mzml_dir = 'D:/user/Swantje/data/PC/mzML/' - # chaet_dir = 'fr7-10' - # mzml_dir = 'D:/user/Swantje/data/Chaetomium/frac7_10/mzML/' - mgf_filtered_dir = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/mscon_PF_20_100_0/' - # mgf_filtered_dir = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/mscon_PF_20/' % chaet_dir - setting_name = 'decoy_pos4_only' - mgf_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/' + setting_name - info_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/relaxation_tbls/' + setting_name - # mgf_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/' % chaet_dir + setting_name - # info_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/relaxation_tbls/' % chaet_dir + setting_name - - if not os.path.exists(mgf_out): - os.makedirs(mgf_out) - if not os.path.exists(info_out): - os.makedirs(info_out) - - # mzmls_in = [x for x in os.listdir(mzml_dir) if '.mzML' in x] - # Parallel(n_jobs=4)(delayed(main)(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir, - # mgf_out) for x in mzmls_in) - # for x in mzmls_in: - # main(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir, mgf_out) - # for mzml_file in [x for x in os.listdir(mzml_dir) if '.mzML' in x]: - # exp = read_mzml(mzml_dir + mzml_file) - # exp_id = mzml_file[:10] - # - # mps_df = ms1_peaks(exp) - # np.savetxt(info_out + '/%s_%s.csv' % (setting_name, mzml_file), mps_df, delimiter=',') - # corresponding_mgf = [x for x in os.listdir(mgf_filtered_dir) if exp_id in x][0] - # add_relaxation_mgf(mgf=mgf_filtered_dir + corresponding_mgf, mps=mps_df, - # outfile=mgf_out + '/%s_' % setting_name + corresponding_mgf) - - for mgf_file in os.listdir(mgf_filtered_dir): - add_relaxation_mgf(mgf=mgf_filtered_dir + mgf_file, mps=[], create_comparison=True, - outfile='D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/' + setting_name + '_' + mgf_file) +import os +import pyopenms as oms +import numpy as np +from ProteoFileReader import MGF_Reader +import mass_trace +from joblib import Parallel, delayed +import re + + +def add_relaxation_mgf(mgf, mps, outfile, create_comparison=False): + # mgf_file, outdir, differences = argls[0], argls[1], argls[2] + mass_diff = 1.00335483 + # if '.mgf' in args['file']: + filename = os.path.split(mgf)[1] + # read mgf + spectra = MGF_Reader() + spectra.load(mgf) + out_writer = open(outfile, "w") + for spectrum in spectra: + # calculate mass (neglect proton bec. later on difference used) + regex_match = re.search('(scan=)[0-9]*', spectrum.getTitle()) + if regex_match is not None: + scan = int(regex_match.group(0).split('scan=')[1]) + else: + scan = int(spectrum.getTitle().split('.')[-2]) + # scan = int(spectrum.getTitle().split('.')[-2]) + # try: + differences = [0, -1, -2, -3, -4] + if not create_comparison: + row = mps[mps[:, 0] == scan, 1:] + if len(row) == 1: + differences = [-i for i in range(len(row[0])) if row[0][i] == 1] + elif len(row) > 1: + raise ValueError('multiple matches to scan %s' % scan) + else: + print 'scan %s not found' % scan + + mass = spectrum.getPrecursorMZ() * spectrum.charge # ToDo: correct? talk to SL -protonMass missing! + spectra_add_mip = [str((mass + x * mass_diff) / spectrum.charge) for x in differences if x != 0] + if 0 in differences: + prec_mz = spectrum.getPrecursorMZ() + else: + prec_mz = spectra_add_mip[0] + spectra_add_mip = spectra_add_mip[1:] + # except KeyError: + # differences = [-2, -1, 0] + + stavrox_mgf = """ +MASS=Monoisotopic +BEGIN IONS +TITLE={} +PEPMASS={} {} +CHARGE={}+ +RTINSECONDS={} +ADDITIONALMZ={} +{} +END IONS """.format(spectrum.getTitle(), prec_mz, + spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, + int(spectrum.charge), spectrum.getRT(), + ';'.join(spectra_add_mip), + "\r".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks])) + out_writer.write(stavrox_mgf) + + +def read_mzml(infile): + # init variables + mzml_file = oms.MzMLFile() + exp = oms.MSExperiment() + + # load spectra into exp + mzml_file.load(infile, exp) + return (exp) + + +def get_error(mz1, mz2, charge=None, ppm=True): + if ppm: + return abs(mz1 - mz2) / mz2 * 1e6 + else: + if charge is not None: + return (mz1 - mz2) * charge + + +def return_mps_range(intensity, mps_max): + if False: #intensity >= 10e6: + return [-1] + else: + return list(range(-1, mps_max - 1, -1)) + + +def ms1_peaks(exp, tolerance=6, mps_range=[-1, -2, -3, -4]): # mps_range=[-1, -2, -3, -4] + # loop through spectra and make count if precursor in MS2 spectrum + peaks_found = [] + n_ms2 = 0 + nspectra = exp.size() + for i, spectrum in enumerate(exp): + + if spectrum.getMSLevel() == 1: + continue + n_ms2 += 1 + if i % 10000 == 0: + print "{}/{} done..".format(i, nspectra) + + # if iidone not in matched_scans: + # continue + precursor = spectrum.getPrecursors()[0] + prec_mz, prec_charge = precursor.getMZ(), precursor.getCharge() + MS1scan = mass_trace.find_parent_MS1_scan(exp, i) + ppm_pseudo = 10 # taken from Svens script, apparently not used in function + mz_trace, scans_trace = mass_trace.extract_mass_trace(exp, MS1scan, prec_mz, prec_charge, ppm_pseudo, tolerance, 10) # test with 20 to see difference + if len(scans_trace) == 1: + tmp_ms1 = exp[MS1scan] # TODO find out why mass_trace does not find anything + else: + # try: + best_isotope_seed = np.argmax(mz_trace[:, 1]) + # except: + # pass + best_seed_spectrum = scans_trace[best_isotope_seed] + + # ms1_prev = 0 + # for j in range(i, 0, -1): + tmp_ms1 = exp[best_seed_spectrum] + + res = tmp_ms1[tmp_ms1.findNearest(prec_mz)] + if abs(res.getMZ() - prec_mz) / prec_mz <= tolerance: + prec_int = res.getIntensity() + # mps_range = return_mps_range(prec_int, mps_max) + # if prec_int >= 3e6: + # peaks_found.append([i + 1, True] + [False] * len(mps_range)) + # continue + # else: + # peaks_found.append([i + 1, True] + [True] * len(mps_range)) + # continue + theo_mip = np.array([prec_mz + (mip_i * 1.00335483) / prec_charge for mip_i in mps_range]) + mip_nearest = [tmp_ms1.findNearest(x) for x in theo_mip] + error = np.array([get_error(tmp_ms1[mea].getMZ(), expi, ppm=True) for expi, mea in + zip(theo_mip, mip_nearest)]) + range_found = [True if x <= tolerance else False for x in error] + if sum(range_found) == 0: + # TODO: try if sensible to not mps search these + if len(mps_range) > 2: + peaks_found.append( + # [i + 1, True] + [True] * len(mps_range) + # [i + 1, True, True, True] + [False] * (len(mps_range) - 2) + # [i + 1, True, True, True, True, False] + [i + 1, True] + [True] * (len(mps_range) - 1) + [False] + ) + else: + peaks_found.append( + [i + 1, True] + [True] * len(mps_range) + ) + continue + else: + # check for continous peaks except -1 peak + # TODO: allow gap? + found = False + lightest_peak = 1 + for i_mip in range(len(range_found), 1, -1): + if range_found[i_mip - 1] & (i_mip > lightest_peak): + lightest_peak = i_mip + if sum(range_found[:i_mip]) == len(range_found[:i_mip]): + # if i_mip == len(range_found): + # sel = [False] * (i_mip - 2) + range_found[i_mip - 2:] + # else: + # sel = [False] * (i_mip - 2) + [True] * 3 + [False] * (len(range_found) - 1 - i_mip) # 2 + # takes lightest 2 continous + existing lighter peaks, excludes heaviar + sel = [False] * (i_mip - 2) + range_found[i_mip - 2:] # 2 + peaks_found.append([i + 1, False] + sel) + found = True + break + # if no continous found take all + if not found: + if not lightest_peak == len(range_found): + peaks_found.append( + # [i + 1, True] + [True] * len(mps_range) + # [i + 1, True] + [True] * (lightest_peak + 1) + [False] * (len(range_found) - lightest_peak - 1) + [i + 1, True] + [True] * (lightest_peak) + [False] * (len(range_found) - lightest_peak) + ) + else: + peaks_found.append([i + 1, True] + [True] * len(mps_range)) + continue + + else: + print 'Precursor not found' + continue + + return np.array(peaks_found) + + +def main(mzmlfile, exp_id, setting, infoout_dir, mgf_in_dir, mgf_out_dir): + exp = read_mzml(mzmlfile) + # exp_id = mzml_file[:10] + + mps_df = ms1_peaks(exp) + np.savetxt(infoout_dir + '/%s_%s.csv' % (setting, exp_id), mps_df, delimiter=',') + corresponding_mgf = [x for x in os.listdir(mgf_in_dir) if exp_id in x][0] + add_relaxation_mgf(mgf=mgf_in_dir + corresponding_mgf, mps=mps_df, + outfile=mgf_out_dir + '/%s_' % setting + corresponding_mgf) + +if __name__ == '__main__': + isotope_diff = 1.00335483 + # mzml_dir = 'D:/user/Swantje/data/PC/mzML/' + # chaet_dir = 'fr7-10' + mzml_dir = 'D:/user/Swantje/dsso/myco_prepro/1c_remaining_frac/test_set/mzml/' + mgf_filtered_dir = '//130.149.167.198/rappsilbergroup/users/lswantje/dsso_opt/searches/mycoplasma_opt/data/filtered/' + # mgf_filtered_dir = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/mscon_PF_20/' % chaet_dir + setting_name = 'mpsreductionfilt' + mgf_out = '//130.149.167.198/rappsilbergroup/users/lswantje/dsso_opt/searches/mycoplasma_opt/data/' + setting_name + info_out = '//130.149.167.198/rappsilbergroup/users/lswantje/dsso_opt/searches/mycoplasma_opt/' + setting_name + # mgf_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/All_prepro_peakfiles/' % chaet_dir + setting_name + # info_out = 'D:/user/Swantje/projects/pipeline_prepro_xi_fdr/chaetomium/%s/relaxation_tbls/' % chaet_dir + setting_name + + if not os.path.exists(mgf_out): + os.makedirs(mgf_out) + if not os.path.exists(info_out): + os.makedirs(info_out) + + mzmls_in = [x for x in os.listdir(mzml_dir) if '.mzML' in x] + # Parallel(n_jobs=4)(delayed(main)(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir, + # mgf_out) for x in mzmls_in) + for x in mzmls_in: + main(mzml_dir + x, x[:10], setting_name, info_out, mgf_filtered_dir, mgf_out) + for mzml_file in [x for x in os.listdir(mzml_dir) if '.mzML' in x]: + exp = read_mzml(mzml_dir + mzml_file) + exp_id = mzml_file[:10] + + mps_df = ms1_peaks(exp) + np.savetxt(info_out + '/%s_%s.csv' % (setting_name, mzml_file), mps_df, delimiter=',') + corresponding_mgf = [x for x in os.listdir(mgf_filtered_dir) if exp_id in x][0] + add_relaxation_mgf(mgf=mgf_filtered_dir + corresponding_mgf, mps=mps_df, + outfile=mgf_out + '/%s_' % setting_name + corresponding_mgf) + + # for mgf_file in os.listdir(mgf_filtered_dir): + # add_relaxation_mgf(mgf=mgf_filtered_dir + mgf_file, + # outfile='D:/user/Swantje/projects/pipeline_prepro_xi_fdr/lars_PC_4frag_BS3_Lumos/All_prepro_peakfiles/' + setting_name + '_' + mgf_file) diff --git a/preprocessing.py b/preprocessing.py index 6ec08f2..5e82ddd 100755 --- a/preprocessing.py +++ b/preprocessing.py @@ -1,5 +1,4 @@ import os -import numpy as np import subprocess from multiprocessing import Pool import sys @@ -8,22 +7,26 @@ from pyteomics import mzml from functools import partial import ProteoFileReader +import mass_recal_ms2 import mass_recal -import zipfile def read_cmdline(): try: - opts, args = getopt.getopt(sys.argv[1:], '', ['input=', 'config=', 'outpath=', 'db=', 'xiconf=', 'shiftcsv=', 'skip_recal=']) + opts, args = getopt.getopt(sys.argv[1:], '', ['input=', 'config=', 'outpath=', 'db=', 'xiconf=', 'shiftcsv=', + 'skip_recal=', 'skip_ms2_recal=']) except getopt.GetoptError: print('preprocessing.py --input ' '--outpath ' '--config ' '--db ' '--xiconf ', - '--shiftcsv --skip_recal ') + '--shiftcsv ', + '--skip_recal ', + '--skip_ms2_recal ') sys.exit() recal = True + ms2recal = True recal_conf = {} for opt, arg in opts: if opt == '--input': @@ -40,6 +43,8 @@ def read_cmdline(): recal_conf['shift_csv'] = arg elif opt == '--skip_recal': recal = False + elif opt == '--skip_ms2_recal': + ms2recal = False if 'input_arg' not in locals() or 'config' not in locals(): print('preprocessing.py --input ' @@ -66,32 +71,27 @@ def read_cmdline(): '--shiftcsv --skip_recal ') sys.exit() - return input_arg, outdir, config, recal_conf, recal + return input_arg, outdir, config, recal_conf, recal, ms2recal -def split_mzml(mzml_file, detector="all"): +def mzml_to_MS2_spectra(mzml_file, detector_filter="all"): """ - function to split a mzML file into dict of MS2_Spectra objects (can be written to mgf format) - by fragmentation method + function to split a mzML file into a list of MS2_Spectra objects (can be written to mgf format) + with fragmentation method and detector type Parameters: ----------------------------------------- mzml_file: str, path to mzML file + detector_filter: filter scans by detector type ('all', 'FT', 'IT') - Return: dict {fragMethod: list(MS2_spectrum) + Return: list(MS2_spectrum) """ mzml_reader = mzml.read(mzml_file) - ordered_ms2_spectra = { - "CID": [], - "HCD": [], - "ETD": [], - "ETciD": [], - "EThcD": [], - "unknown": [] - } + sorted_ms2_spectra = [] + unknown_frag_method_count = 0 n = 0 for spectrum in mzml_reader: @@ -102,10 +102,10 @@ def split_mzml(mzml_file, detector="all"): detector_str = re.search("^(FT|IT)", filter_str).groups()[0] frag_groups = re.findall("@([A-z]+)([0-9.]+)", filter_str) except AttributeError: - raise StandardError("filter string parse error: %s" % filter_str) + raise Exception("filter string parse error: %s" % filter_str) - if not detector == "all": - if not detector == detector_str: + if not detector_filter == "all": + if not detector_filter == detector_str: continue title = os.path.split(mzml_file)[1].split('.mzML')[0] + " " + spectrum['id'] @@ -119,27 +119,40 @@ def split_mzml(mzml_file, detector="all"): pre_z = precursor['charge state'] peaks = zip(spectrum['m/z array'], spectrum['intensity array']) - ms2class_spectrum = ProteoFileReader.MS2_spectrum(title, rt, pre_mz, pre_int, pre_z, peaks) - frag_methods = [f[0] for f in frag_groups] if "etd" in frag_methods: if "cid" in frag_methods: - ordered_ms2_spectra['ETciD'].append(ms2class_spectrum) + frag_method = "ETciD" elif "hcd" in frag_methods: - ordered_ms2_spectra['EThcD'].append(ms2class_spectrum) + frag_method = "EThcD" else: - ordered_ms2_spectra['ETD'].append(ms2class_spectrum) + frag_method = "ETD" elif "cid" in frag_methods: - ordered_ms2_spectra['CID'].append(ms2class_spectrum) + frag_method = "CID" elif "hcd" in frag_methods: - ordered_ms2_spectra['HCD'].append(ms2class_spectrum) + frag_method = "HCD" else: - ordered_ms2_spectra['unknown'].append(ms2class_spectrum) - if len(ordered_ms2_spectra['unknown']) > 0: - raise Warning("The fragmentation method of %i spectra could not be identified" % len(ordered_ms2_spectra['unknown'])) + frag_method = 'unknown' + unknown_frag_method_count += 1 + + ms2class_spectrum = ProteoFileReader.MS2_spectrum( + title, + rt, + pre_mz, + pre_int, + pre_z, + peaks, + detector=detector_str, + fragmethod=frag_method + ) + + sorted_ms2_spectra.append(ms2class_spectrum) - return {k: v for k, v in ordered_ms2_spectra.items() if len(v) > 0} + if unknown_frag_method_count > 0: + raise Warning("The fragmentation method of %i spectra could not be identified" % unknown_frag_method_count) + + return sorted_ms2_spectra def generate_cihcd_spectra(mzml_file): @@ -160,11 +173,11 @@ def generate_cihcd_spectra(mzml_file): frag_groups = re.findall("@([A-z]+)([0-9.]+)", filter_str) precursor_mz_groups = re.findall("([0-9.]+)@", filter_str) except AttributeError: - raise StandardError("filter string parse error: %s" % filter_str) + raise Exception("filter string parse error: %s" % filter_str) try: ms2_id = spectrum['precursorList']['precursor'][0]['spectrumRef'] except KeyError: - ms2_id = '' # TODO why Key ERror + ms2_id = '' # TODO why Key Error title = os.path.split(mzml_file)[1].split('.mzML')[0] + " " + spectrum['id'] + " ms2_scanId=" + ms2_id rt = spectrum['scanList']['scan'][0]['scan start time'] * 60 @@ -199,55 +212,7 @@ def mscon_cmd(filepath, outdir, settings, mgf): return cmd_list -def write_mgf(spectra, outfile): - out_writer = open(os.path.join(outfile), "w") - for spectrum in spectra: - scan = re.search('scan=[0-9]*', spectrum.getTitle()).group(0)[5:] - # title = spectrum.getTitle() - try: - title = re.match('(B|E)[0-9]{6}_[0-9]{2}.+?( )', spectrum.getTitle()).group(0)[:-1] - except AttributeError: - title = re.match('[0-9]{8}_[0-9]{2}.+?( )', spectrum.getTitle()).group(0)[:-1] - title = '.'.join([title, scan, scan, str(int(spectrum.charge))]) - if 'ms2_scanId' in spectrum.getTitle(): - try: - ms2_parent = re.search('ms2_scanId=.*scan=([0-9]+)', spectrum.getTitle()).groups()[0] - except AttributeError: - ms2_parent = 0 - title += ' ms2_scanId=%s' % ms2_parent - if sys.version_info.major < 3: - stavrox_mgf = """ -MASS=Monoisotopic -BEGIN IONS -TITLE={} -PEPMASS={} {} -CHARGE={}+ -RTINSECONDS={} -{} -END IONS """.format(title, - spectrum.getPrecursorMass(), - spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, - int(spectrum.charge), spectrum.getRT(), - "\n".join(["%s %s" % (i[0], i[1]) for i in spectrum.peaks if i[1] > 0])) - else: - stavrox_mgf = """ -MASS=Monoisotopic -BEGIN IONS -TITLE={} -PEPMASS={} {} -CHARGE={}+ -RTINSECONDS={} -{} -END IONS """.format(title, - spectrum.getPrecursorMass(), - spectrum.getPrecursorIntensity() if spectrum.getPrecursorIntensity() > 0 else 0, - int(spectrum.charge), spectrum.getRT(), - "\n".join(["%s %s" % (mz, spectrum.peaks[1][i]) for i, mz in enumerate(spectrum.peaks[0]) if - spectrum.peaks[1][i] > 0])) - out_writer.write(stavrox_mgf) - - -def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, mscon_exe, cihcd_ms3=True): #TODO implement option further up +def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, mscon_exe, cihcd_ms3=False): #TODO implement option further up if not os.path.exists(outdir): os.makedirs(outdir) @@ -262,19 +227,20 @@ def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, m if cihcd_ms3: cihcd_spectra = generate_cihcd_spectra(mzml_file) - write_mgf(spectra=cihcd_spectra, outfile=os.path.join(outdir, 'CIhcD_ms3_' + filename[:filename.rfind('.')] + '.mgf')) + ProteoFileReader.write_mgf(spectra=cihcd_spectra, outfile=os.path.join(outdir, 'CIhcD_ms3_' + filename[:filename.rfind('.')] + '.mgf')) if split_acq: - splitted_spectra = split_mzml(mzml_file, detector_filter) + split_spectra = mzml_to_MS2_spectra(mzml_file, detector_filter) - for acq in splitted_spectra: - write_mgf(spectra=splitted_spectra[acq], - outfile=os.path.join(outdir, acq + '_' + filename[:filename.rfind('.')]+'.mgf')) + ProteoFileReader.write_mgf( + spectra=split_spectra, + outfile=os.path.join(outdir, filename[:filename.rfind('.')]+'.mgf') + ) if __name__ == '__main__': - # read cmdline arguments / get deafult values - input_arg, outdir, config_path, recal_conf, recal = read_cmdline() + # read cmdline arguments / get default values + input_arg, outdir, config_path, recal_conf, recal, ms2recal = read_cmdline() try: execfile(config_path) except NameError: @@ -300,24 +266,29 @@ def process_file(filepath, outdir, mscon_settings, split_acq, detector_filter, m pool.close() pool.join() - recal_in = [os.path.join(outdir, x) for x in os.listdir(outdir) if '.mgf' in x] + mgf_file_list = [os.path.join(outdir, x) for x in os.listdir(outdir) if '.mgf' in x] if recal: - # pool = Pool(processes=nthr) + if not os.path.exists(outdir): os.makedirs(outdir) - output = zipfile.ZipFile(outdir + '/recalibrated_files.zip', 'w', zipfile.ZIP_DEFLATED) + # TODO change to parallel with manual input of error - for inputfile in recal_in: + for inputfile in mgf_file_list: if 'ms3' in os.path.split(inputfile)[1]: continue - mass_recal.main(fasta=recal_conf['db'], xi_cnf=recal_conf['xiconf'], outpath=outdir, - mgf=inputfile, threads=str(nthr), - val_input=recal_conf['shift_csv'] #'D:/user/Swantje/dsso_ot_it_error/raw/processed_together/ms1_err.csv' - ) - # val_input='//130.149.167.198/rappsilbergroup/users/lswantje/DSSO_prepro/xlinkx/processed_wosplit/ms1_err.csv' - output.write(os.path.join(outdir, 'recal_' + os.path.split(inputfile)[1]), - arcname='recal_' + os.path.split(inputfile)[1]) - # pool.map(partial(mass_recal.main, fasta=database, xi_cnf=xi_recal_config, outpath=outdir + '/recal', - # xi_jar=xi_offline), recal_in) - # pool.close() - # pool.join() + if ms2recal: + mass_recal_ms2.main(fasta=recal_conf['db'], xi_cnf=recal_conf['xiconf'], outpath=outdir, + mgf=inputfile, threads=str(nthr), + val_input=recal_conf['shift_csv'] + ) + else: + mass_recal.main(fasta=recal_conf['db'], xi_cnf=recal_conf['xiconf'], outpath=outdir, + mgf=inputfile, threads=str(nthr), + val_input=recal_conf['shift_csv'] + ) + + mgf_file_list = [os.path.join(os.path.split(x)[0], 'recal_' + os.path.split(x)[1]) for x in mgf_file_list] + + if split_acq: + for mgf_file in mgf_file_list: + ProteoFileReader.split_mgf_methods(mgf_file) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8dc241c..0000000 --- a/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -backports.functools-lru-cache==1.5 -cycler==0.10.0 -kiwisolver==1.0.1 -lxml==4.2.3 -matplotlib==2.2.2 -numpy==1.14.5 -pandas==0.23.3 -pyopenms==2.3.0.4 -pyparsing==2.2.0 -pyteomics==3.5.1 -python-dateutil==2.7.3 -pytz==2018.5 -scipy==1.1.0 -seaborn==0.9.0 -six==1.11.0 -subprocess32==3.5.2 diff --git a/resources/XiSearch_1.6.739.jar b/resources/XiSearch_1.6.739.jar deleted file mode 100755 index 86ff068..0000000 Binary files a/resources/XiSearch_1.6.739.jar and /dev/null differ diff --git a/resources/XiSearch_1.6.745.jar b/resources/XiSearch_1.6.745.jar new file mode 100755 index 0000000..263b0f3 Binary files /dev/null and b/resources/XiSearch_1.6.745.jar differ