From cf9984f92dd5cf93ad50ac23574b2d0c8cc661c4 Mon Sep 17 00:00:00 2001 From: WangK2 Date: Fri, 3 May 2024 00:09:43 +0800 Subject: [PATCH] Rewrite cssci rules --- README.md | 19 +- refparse/__init__.py | 2 +- refparse/cssci.py | 250 ++++++++----------------- refparse/scopus.py | 4 +- tests/test_cssci.py | 424 +++++++++++++------------------------------ 5 files changed, 214 insertions(+), 485 deletions(-) diff --git a/README.md b/README.md index 6337694..3a79561 100644 --- a/README.md +++ b/README.md @@ -27,16 +27,13 @@ $ pip install refparse ## Return Fields -| | Web of Science | Scopus | CSSCI* | +| | Web of Science | Scopus | CSSCI | | :---: | :---: | :---: | :---: | -| author | ✓ | ✓ | | -| title | | ✓ | | -| source | ✓ | ✓ | | -| volume | ✓ | ✓ | | -| issue | | ✓ | | -| page | ✓ | ✓ | | -| year | ✓ | ✓ | | +| author | ✓ | ✓ | ✓ | +| title | | ✓ | ✓ | +| source | ✓ | ✓ | ✓ | +| volume | ✓ | ✓ | ✓ | +| issue | | ✓ | ✓ | +| page | ✓ | ✓ | ✓ | +| year | ✓ | ✓ | ✓ | | doi | ✓ | | | -| identifier except doi | | | ✓ | - -\* CSSCI will return differently depending on reference type. diff --git a/refparse/__init__.py b/refparse/__init__.py index a5413c3..85039d2 100644 --- a/refparse/__init__.py +++ b/refparse/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.4.1" +__version__ = "0.5.0" from typing import Literal, Optional from .wos import ParseWos diff --git a/refparse/cssci.py b/refparse/cssci.py index 9ec9147..9decbfd 100644 --- a/refparse/cssci.py +++ b/refparse/cssci.py @@ -4,202 +4,104 @@ class ParseCssci: def __init__(self, ref: str): - self.ref = self.clean(ref) - self.dot_count = self.ref.count(".") + self.ref = ref @staticmethod def clean(ref: str) -> str: - return re.sub(r"^\d*\.", "", ref) + ref = ref.strip(".") + ref = re.sub(r"^\d*\.", "", ref) + # Remove unwanted info from newspaper ref + # e.g. 3.郑晋鸣.南京城东五高校建图书馆联合体.光明日报.04.24(7) + if re.search(r"\.\d{1,2}\.\d{1,2}(?=\(|$)", ref): + ref = re.split(r"\.(?=\d+\.)", ref, 1)[0] + return ref - def parse(self): - # Web resource - if re.search(r"\.\d{4}$", self.ref): - return self.parse_web() - - if re.search(r"[\u4e00-\u9fa5]", self.ref): - if "GB/" in self.ref: - return self.parse_standard() - - elif self.ref[-3:] == "出版社": - return self.parse_book() - - elif ":学位论文." in self.ref: - return self.parse_thesis() - - # Newspaper - elif re.search(r"[^\d]\.\d{1,2}\.\d{1,2}", self.ref): - return self.parse_newspaper() - - # Patent 1 - elif re.search(r"\.CN\d{9}[A-Z]$", self.ref): - return self.parse_patent1() - - # Patent 2 - elif re.search(r"^一种", self.ref): - return self.parse_patent2() + @staticmethod + def drop(ref: str) -> Optional[str]: + if re.search(r"^\d+\.\d", ref): + return None - else: - return self.parse_paper() - else: - return self.parse_english() + elif re.search(r"^\d+\.\.$", ref): + return None - def parse_web(self) -> dict[str, Optional[str]]: - if self.dot_count == 2: - author, title, year = self.ref.split(".") - elif self.dot_count > 2: - author = self.ref.split(".", 1)[0] - year = self.ref.rsplit(".", 1)[1] - title = self.ref.replace(author + ".", "").replace("." + year, "") - if author == "": - author = None - return {"type": "web", "author": author, "title": title, "year": year} + # e.g. 2..Campbell v. Acuff-Rose Music, Inc., 510 U. S. 569 (1994),1994 + elif re.search(r"\d{4}\),\d{4}", ref) and re.search(r"^\d+\.\.", ref): + return None - def parse_standard(self) -> dict[str, Optional[str]]: - if "出版社" in self.ref: - year = None - if self.dot_count == 2: - author, title, source = self.ref.split(".") - elif self.dot_count > 2: - author = self.ref.split(".", 1)[0] - source = self.ref.rsplit(".", 1)[1] - title = self.ref.split(".", 1)[1].replace("." + source, "") - else: - source = None - author = self.ref.split(".", 1)[0] - if re.search(r",\d{4}$", self.ref): - year = self.ref[-4:] - title = self.ref.split(".", 1)[1].replace("," + year, "") - else: - year = None - title = self.ref.split(".", 1)[1] + # Drop patent + # e.g. 26.图书上下架机器人.CN102152293A + elif re.search(r"\.CN\d{9}[A-Z]$", ref): + return None - if author == "": - author = None - if title.startswith("GB/"): - identifier, title = re.split(r"[,,] ?", title, 1) + # e.g. 9.一种基于RFID技术的自动式图书智能盘点机器人:201620075212.0.2016-01-25 + elif re.search(r"^\d+\.一种", ref): + return None else: - title, identifier = re.split(r":(?=GB)", title, 1) - return { - "type": "standard", - "author": author, - "title": title, - "source": source, - "year": year, - "identifier": identifier, - } + return ref - def parse_book(self) -> dict[str, Optional[str]]: - author, title, source = self.ref.split(".") - return {"type": "book", "author": author, "title": title, "source": source} + def extract(self, pattern: str, ref: Optional[str] = None, flags=0) -> Optional[str]: + if not ref: + ref = self.ref + match = re.search(pattern, ref, flags) + return match.group(1) if match else None - def parse_thesis(self) -> dict[str, Optional[str]]: - author, title, other = self.ref.split(".", 2) - title = title[:-5] - source, year = other.split(",") - year = year if len(year) == 4 else year[:4] - return {"type": "thesis", "author": author, "title": title, "source": source, "year": year} + def extract_author(self) -> Optional[str]: + if re.search(r"[A-Z]\.\.", self.ref): + author = self.extract(r"^(.*\.)\.") - def parse_newspaper(self) -> dict[str, Optional[str]]: - author, title, source, date = self.ref.split(".", 3) - if author == "": + elif re.search(r"^\.", self.ref): author = None - date = date.split("(", 1)[0] - return {"type": "newspaper", "author": author, "title": title, "source": source, "date": date} - - def parse_patent1(self) -> dict[str, Optional[str]]: - title, identifier = self.ref.split(".", 1) - return {"type": "patent", "title": title, "identifier": identifier} - def parse_patent2(self) -> dict[str, Optional[str]]: - title, other = self.ref.split(":", 1) - identifier = other.rsplit(".", 1)[0] - identifier = re.sub(r"^[^\d]*(?=\d)", "", identifier) - return {"type": "patent", "title": title, "identifier": identifier} - - def parse_paper(self) -> dict[str, Optional[str]]: - author, title, source, year, volume_issue = self.ref.split(".", 4) - if volume_issue.startswith("("): - volume = None - issue = volume_issue.strip("()") else: - volume, issue = volume_issue.split("(") - issue = issue.strip(")") - return { - "type": "paper", - "author": author, - "title": title, - "source": source, - "year": year, - "volume": volume, - "issue": issue, - } - - def parse_english(self) -> dict[str, Optional[str]]: - def split_author_title(ref_left: str): - if ".." in self.ref: - author, title = ref_left.split("..", 1) - author += "." - + author = self.ref.split(".", 1)[0] + return author + + def parse(self) -> Optional[dict[str, Optional[str]]]: + if self.drop(self.ref): + self.ref = self.clean(self.ref) + dot_count = self.ref.count(".") + if dot_count == 0: + return { + "author": None, + "title": self.ref, + "source": None, + "year": None, + "volume": None, + "issue": None, + } + + volume, issue, page = None, None, None + year = self.extract(r"[\.,](\d{4})\b") + if year: + volume = self.extract(r"[\.,]\d{4}\.(\d+)\b") + issue = self.extract(r"\((\d+)\)$") + if not (volume and issue): + # e.g. 22.邓万云.利用Internet开拓我州科技信息服务新领域,2006:204-208 + page = self.extract(r":([\d-]+)$") + + author = self.extract_author() + # 1..2021年度江苏省公共图书馆大数据统计报告 + ref_left = re.split(r"[\.,](?=\d{4}\b)", self.ref, 1)[0] + if author: + ref_left = ref_left.replace(author + ".", "", 1) else: - dot_count = ref_left.count(".") - if dot_count == 1: - author, title = ref_left.split(".", 1) - - elif dot_count > 1: - author, title = ref_left.rsplit(".", 1) - else: - author, title = None, None - return author, title - - # English book - if re.search(r":[A-Z]", self.ref): + ref_left = ref_left[1:] try: - ref_left, year_page = re.split(r",(?=\d{4})", self.ref, 1) - except ValueError: - ref_left = self.ref - year = None - page = None + title, source = ref_left.rsplit(".", 1) + except: + title = ref_left + source = None else: - year = year_page[:4] - page = year_page[5:] - - if re.search(r"\.[A-Z][A-Za-z()]+:", ref_left): - ref_left, source = ref_left.rsplit(".", 1) - - elif re.search(r"\.(?:[A-Z][A-Za-z]+ ){1,2}[A-Z][A-Za-z]+:[A-Z]", ref_left): - ref_left, source = ref_left.rsplit(".", 1) - - else: - ref_left, source = ref_left.rsplit(":", 1) - - author, title = split_author_title(ref_left) + # e.g. 4..Society 5.0——科学技术政策——内阁府.2020 + if re.search(r"^\d", source): + title = title + "." + source + source = None return { - "type": "english-book", "author": author, "title": title, "source": source, "year": year, + "volume": volume, + "issue": issue, "page": page, } - - # English paper - ref_left, year_volume_issue = re.split(r"\.(?=\d{4})", self.ref, 1) - year = year_volume_issue.split(".", 1)[0] - issue = year_volume_issue.split("(", 1)[1].strip(")") - if ".(" in year_volume_issue: - volume = None - else: - volume = year_volume_issue.split("(", 1)[0][5:] - - ref_left, source = ref_left.rsplit(".", 1) - author, title = split_author_title(ref_left) - - return { - "type": "english-paper", - "author": author, - "title": title, - "source": source, - "year": year, - "volume": volume, - "issue": issue, - } diff --git a/refparse/scopus.py b/refparse/scopus.py index 653f4bb..5fc10a2 100644 --- a/refparse/scopus.py +++ b/refparse/scopus.py @@ -46,7 +46,7 @@ def clean(ref: str) -> str: # Add page symbol if re.search(r", \d+-\d+, \(", ref): if not re.search(r", \d{4}-\d{4}, ", ref): - match = re.search(r", (\d+-\d+), ", ref).group(1) + match = re.search(r", (\d+-\d+), ", ref).group(1) # type: ignore if re.search(r"\d, \d+-", ref): a, b = (int(i) for i in match.split("-")) # Exclude possible issue @@ -201,7 +201,7 @@ def parse_general(self) -> dict[str, Optional[str]]: if title == "unknown": # Remove other fields info if source: - repr_str = re.match(r"([A-Za-z\d\. ]{,20})", source)[1] + repr_str = re.match(r"([A-Za-z\d\. ]{,20})", source).group(1) # type: ignore ref_left = re.sub(f", {repr_str}.*$", "", self.ref) elif volume: ref_left = re.sub(f", {volume}.*$", "", self.ref) diff --git a/tests/test_cssci.py b/tests/test_cssci.py index 4aaa485..f1661de 100644 --- a/tests/test_cssci.py +++ b/tests/test_cssci.py @@ -3,364 +3,228 @@ from refparse.cssci import ParseCssci test_clean_data = [ - ( - "1.康德.纯粹理性批判.北京:人民出版社", - "康德.纯粹理性批判.北京:人民出版社", - ), ( "7..中华人民共和国公共图书馆法.2021", ".中华人民共和国公共图书馆法.2021", ), + ( + "1.段美珍.智慧图书馆的内涵特点及其认知模型研究.图书情报工作.", + "段美珍.智慧图书馆的内涵特点及其认知模型研究.图书情报工作", + ), + ( + "3.郑晋鸣.南京城东五高校建图书馆联合体.光明日报.04.24(7)", + "郑晋鸣.南京城东五高校建图书馆联合体.光明日报", + ), ] -test_web_data = [ +test_drop_data = [ ( - "8.Google.Analytics.js.2021", - { - "type": "web", - "author": "Google", - "title": "Analytics.js", - "year": "2021", - }, + "1.17 U. S. C. § 107", + None, ), ( - "9..CNNIC:微博用户达2.5亿,近半数网民使用.2012", - { - "type": "web", - "author": None, - "title": "CNNIC:微博用户达25亿,近半数网民使用2012", - "year": "2012", - }, + "1..", + None, ), ( - "22.IFLA.IFLA STRATEGY 2019-2024.2019", - { - "type": "web", - "author": "IFLA", - "title": "IFLA STRATEGY 2019-2024", - "year": "2019", - }, + "2..Campbell v. Acuff-Rose Music, Inc., 510 U. S. 569 (1994),1994", + None, ), -] - -test_standard_data = [ ( - "8..GB/T37043-2018,智慧城市术语.北京:中国标准出版社", - { - "type": "standard", - "author": None, - "title": "智慧城市术语", - "source": "北京:中国标准出版社", - "year": None, - "identifier": "GB/T37043-2018", - }, + "26.图书上下架机器人.CN102152293A", + None, ), ( - "17.全国信息技术标准化技术委员会教育技术分会.GB/T 36342-2018,智慧校园总体框架,2018", - { - "type": "standard", - "author": "全国信息技术标准化技术委员会教育技术分会", - "title": "智慧校园总体框架", - "source": None, - "year": "2018", - "identifier": "GB/T 36342-2018", - }, + "9.一种基于RFID技术的自动式图书智能盘点机器人:201620075212.0.2016-01-25", + None, ), ( - "30.全国信息技术标准化技术委员会.智慧城市,数据融合,第5部分:市政基础设施数据元素:GB/T 36625.5-2019.北京:中国标准出版社", - { - "type": "standard", - "author": "全国信息技术标准化技术委员会", - "title": "智慧城市,数据融合,第5部分:市政基础设施数据元素", - "source": "北京:中国标准出版社", - "year": None, - "identifier": "GB/T 36625.5-2019", - }, + "19.皮亚杰.儿童心理学.北京:商务印书馆", + "19.皮亚杰.儿童心理学.北京:商务印书馆", ), ] -test_book_data = [ +test_author_data = [ ( - "14.吴建中.21世纪图书馆新论.上海:上海科学技术文献出版社", - { - "type": "book", - "author": "吴建中", - "title": "21世纪图书馆新论", - "source": "上海:上海科学技术文献出版社", - }, + "25.Remy,M..Information Literacy: The Information Commons Connection.California,2004", + "Remy,M.", ), ( - "3.金元浦.中国文化概论.北京:首都师范大学出版社", - { - "type": "book", - "author": "金元浦", - "title": "中国文化概论", - "source": "北京:首都师范大学出版社", - }, + "1..2021年度江苏省公共图书馆大数据统计报告", + None, + ), + ( + "14.陈大庆.FOLIO在深圳大学,2018", + "陈大庆", ), ] -test_thesis_data = [ +test_parse_data = [ ( - "21.郑怿昕.智慧图书馆环境下馆员核心能力研究:学位论文.南京:南京农业大学,2015:27-31", + "5.北京市第一中级人民法院民事判决书(2011)一中民初字第1321号", { - "type": "thesis", - "author": "郑怿昕", - "title": "智慧图书馆环境下馆员核心能力研究", - "source": "南京:南京农业大学", - "year": "2015", + "author": None, + "title": "北京市第一中级人民法院民事判决书(2011)一中民初字第1321号", + "source": None, + "year": None, + "volume": None, + "issue": None, }, ), ( - "17.段美珍.智慧图书馆建设评价模型与应用研究:学位论文.北京:中国科学院大学,2020", + "10..GB/T 35273-2020,信息安全技术个人信息安全规范", { - "type": "thesis", - "author": "段美珍", - "title": "智慧图书馆建设评价模型与应用研究", - "source": "北京:中国科学院大学", - "year": "2020", + "author": None, + "title": "GB/T 35273-2020,信息安全技术个人信息安全规范", + "source": None, + "year": None, + "volume": None, + "issue": None, + "page": None, }, ), -] - -test_newspaper_data = [ ( "6..习近平在第二届世界互联网大会开幕式上的讲话.人民日报.12.17(2)", { - "type": "newspaper", "author": None, "title": "习近平在第二届世界互联网大会开幕式上的讲话", "source": "人民日报", - "date": "12.17", + "year": None, + "volume": None, + "issue": None, + "page": None, }, ), ( - "25.曹磊.大数据:数字世界的智慧基因.文汇报.11.8(12)", + "21.郑怿昕.智慧图书馆环境下馆员核心能力研究:学位论文.南京:南京农业大学,2015:27-31", { - "type": "newspaper", - "author": "曹磊", - "title": "大数据:数字世界的智慧基因", - "source": "文汇报", - "date": "11.8", + "author": "郑怿昕", + "title": "智慧图书馆环境下馆员核心能力研究:学位论文", + "source": "南京:南京农业大学", + "year": "2015", + "volume": None, + "issue": None, + "page": "27-31", }, ), ( - "65..图书馆来了机器人管理员.宁波日报.1.8", + "9..CNNIC:微博用户达2.5亿,近半数网民使用.2012", { - "type": "newspaper", "author": None, - "title": "图书馆来了机器人管理员", - "source": "宁波日报", - "date": "1.8", + "title": "CNNIC:微博用户达2.5亿,近半数网民使用", + "source": None, + "year": "2012", + "volume": None, + "issue": None, + "page": None, }, ), -] - -test_patent1_data = [ ( - "26.图书上下架机器人.CN102152293A", + "4..Society 5.0——科学技术政策——内阁府.2020", { - "type": "patent", - "title": "图书上下架机器人", - "identifier": "CN102152293A", + "author": None, + "title": "Society 5.0——科学技术政策——内阁府", + "source": None, + "year": "2020", + "volume": None, + "issue": None, + "page": None, }, - ) -] -test_patent2_data = [ + ), ( - "9.一种基于RFID技术的自动式图书智能盘点机器人:201620075212.0.2016-01-25", + "5.杨新涯.2.0的图书馆.广州:中山大学出版社", { - "type": "patent", - "title": "一种基于RFID技术的自动式图书智能盘点机器人", - "identifier": "201620075212.0", + "author": "杨新涯", + "title": "2.0的图书馆", + "source": "广州:中山大学出版社", + "year": None, + "volume": None, + "issue": None, + "page": None, }, ), ( - "39.一种基于区块链的金融安全存证平台系统及方法:中国,201910838935. X(2019-09-05)", + "9.全国人民代表大会常务委员会.中华人民共和国个人信息保护法,2021", { - "type": "patent", - "title": "一种基于区块链的金融安全存证平台系统及方法", - "identifier": "201910838935", + "author": "全国人民代表大会常务委员会", + "title": "中华人民共和国个人信息保护法", + "source": None, + "year": "2021", + "volume": None, + "issue": None, + "page": None, }, ), -] - - -test_paper_data = [ ( - "2.严栋.基于物联网的智慧图书馆.图书馆学刊.2010.32(7)", + "1.马费成.图书情报学与元宇宙:共识共创共进", { - "type": "paper", - "author": "严栋", - "title": "基于物联网的智慧图书馆", - "source": "图书馆学刊", - "year": "2010", - "volume": "32", - "issue": "7", + "author": "马费成", + "title": "图书情报学与元宇宙:共识共创共进", + "source": None, + "year": None, + "volume": None, + "issue": None, + "page": None, }, ), ( "39.刘炜.5G与智慧图书馆建设.中国图书馆学报.2019.45(5)", { - "type": "paper", "author": "刘炜", "title": "5G与智慧图书馆建设", "source": "中国图书馆学报", "year": "2019", "volume": "45", "issue": "5", - }, - ), -] - -test_english_data = [ - ( - "20.Alexei,P..Rite of passage.USA:Galaxy Publishing Co", - { - "type": "english-book", - "author": "Alexei,P.", - "title": "Rite of passage", - "source": "USA:Galaxy Publishing Co", - "year": None, "page": None, }, ), - ( - "8.Sohail,S S.Book recommendation system using opinion mining technique:IEEE,2013:1609-1614", - { - "type": "english-book", - "author": "Sohail,S S", - "title": "Book recommendation system using opinion mining technique", - "source": "IEEE", - "year": "2013", - "page": "1609-1614", - }, - ), ( "7.Vaz,P C.Improving a hybrid literary book recommendation system through author ranking.New York:Association for Computing Machinery,2012:387-388", { - "type": "english-book", "author": "Vaz,P C", "title": "Improving a hybrid literary book recommendation system through author ranking", "source": "New York:Association for Computing Machinery", "year": "2012", + "volume": None, + "issue": None, "page": "387-388", }, ), ( - "7.Hunzaker,M.B. Fallin.Mapping Cultural Schemas: From Theory to Method.American Sociological Review.2019.84(5)", + "22.IFLA.IFLA STRATEGY 2019-2024.2019", { - "type": "english-paper", - "author": "Hunzaker,M.B. Fallin", - "title": "Mapping Cultural Schemas: From Theory to Method", - "source": "American Sociological Review", + "author": "IFLA", + "title": "IFLA STRATEGY 2019-2024", + "source": None, "year": "2019", - "volume": "84", - "issue": "5", - }, - ), - ( - "1.Aittola,M..Smart Library: Location-Aware Mobile Library Service.International Symposium on Human Computer Interaction with Mobile Devices and Services.2003.(5)", - { - "type": "english-paper", - "author": "Aittola,M.", - "title": "Smart Library: Location-Aware Mobile Library Service", - "source": "International Symposium on Human Computer Interaction with Mobile Devices and Services", - "year": "2003", "volume": None, - "issue": "5", + "issue": None, + "page": None, }, ), -] - -test_parse_data = [ ( - "3..2021第五届中国未来智慧图书馆发展论坛.2021", + "1.Lu,Y..Digital Twin-driven smart manufacturing: Connotation, reference model, applications and research issues.Robotics and Computer-Integrated Manufacturing.2020.61", { - "type": "web", - "author": None, - "title": "2021第五届中国未来智慧图书馆发展论坛", - "year": "2021", + "author": "Lu,Y.", + "title": "Digital Twin-driven smart manufacturing: Connotation, reference model, applications and research issues", + "source": "Robotics and Computer-Integrated Manufacturing", + "year": "2020", + "volume": "61", + "issue": None, + "page": None, }, ), ( - "10..GB/T 35273-2020,信息安全技术个人信息安全规范", + "14.Hufflen,J M.Languages for Bibliography Styles.TUGB", { - "type": "standard", - "author": None, - "title": "信息安全技术个人信息安全规范", - "source": None, + "author": "Hufflen,J M", + "title": "Languages for Bibliography Styles", + "source": "TUGB", "year": None, - "identifier": "GB/T 35273-2020", - }, - ), - ( - "14.吴慰慈.图书馆学概论.北京:北京图书馆出版社", - { - "type": "book", - "author": "吴慰慈", - "title": "图书馆学概论", - "source": "北京:北京图书馆出版社", - }, - ), - ( - "37.潘星.智慧图书馆联盟建设策略研究:学位论文.扬州:扬州大学,2021", - { - "type": "thesis", - "author": "潘星", - "title": "智慧图书馆联盟建设策略研究", - "source": "扬州:扬州大学", - "year": "2021", - }, - ), - ( - "12.王伟健.一个来了还想再来的图书馆.人民日报.1.7(11)", - { - "type": "newspaper", - "author": "王伟健", - "title": "一个来了还想再来的图书馆", - "source": "人民日报", - "date": "1.7", - }, - ), - ( - "25.一种用于图书馆机器人的书本上架装置.CN202880264U", - { - "type": "patent", - "title": "一种用于图书馆机器人的书本上架装置", - "identifier": "CN202880264U", - }, - ), - ( - "10.一种基于RFID标签RSSI信号值的图书排序方法:201610050963.1.2016-01-25", - { - "type": "patent", - "title": "一种基于RFID标签RSSI信号值的图书排序方法", - "identifier": "201610050963.1", - }, - ), - ( - "33.司莉.科学数据的标准规范体系框架研究.图书馆.2016.(5)", - { - "type": "paper", - "author": "司莉", - "title": "科学数据的标准规范体系框架研究", - "source": "图书馆", - "year": "2016", "volume": None, - "issue": "5", - }, - ), - ( - "18.Ahirwar,J..Five Laws of Library Science and Information Economics.Informatics Studies.2021.7(1)", - { - "type": "english-paper", - "author": "Ahirwar,J.", - "title": "Five Laws of Library Science and Information Economics", - "source": "Informatics Studies", - "year": "2021", - "volume": "7", - "issue": "1", + "issue": None, + "page": None, }, ), ] @@ -371,49 +235,15 @@ def test_clean(input, expected): assert ParseCssci.clean(input) == expected -@pytest.mark.parametrize("input, expected", test_web_data) -def test_parse_web(input, expected): - assert ParseCssci(input).parse_web() == expected - - -@pytest.mark.parametrize("input, expected", test_standard_data) -def test_parse_standard(input, expected): - assert ParseCssci(input).parse_standard() == expected - - -@pytest.mark.parametrize("input, expected", test_book_data) -def test_parse_book(input, expected): - assert ParseCssci(input).parse_book() == expected - - -@pytest.mark.parametrize("input, expected", test_thesis_data) -def test_parse_thesis(input, expected): - assert ParseCssci(input).parse_thesis() == expected - - -@pytest.mark.parametrize("input, expected", test_newspaper_data) -def test_parse_newspaper(input, expected): - assert ParseCssci(input).parse_newspaper() == expected - - -@pytest.mark.parametrize("input, expected", test_patent1_data) -def test_parse_patent1(input, expected): - assert ParseCssci(input).parse_patent1() == expected - - -@pytest.mark.parametrize("input, expected", test_patent2_data) -def test_parse_patent2(input, expected): - assert ParseCssci(input).parse_patent2() == expected - - -@pytest.mark.parametrize("input, expected", test_paper_data) -def test_parse_paper(input, expected): - assert ParseCssci(input).parse_paper() == expected +@pytest.mark.parametrize("input, expected", test_drop_data) +def test_drop(input, expected): + assert ParseCssci.drop(input) == expected -@pytest.mark.parametrize("input, expected", test_english_data) -def test_parse_english(input, expected): - assert ParseCssci(input).parse_english() == expected +@pytest.mark.parametrize("input, expected", test_author_data) +def test_extract_author(input, expected): + ref = ParseCssci.clean(input) + assert ParseCssci(ref).extract_author() == expected @pytest.mark.parametrize("input, expected", test_parse_data)