forked from HugoBlox/theme-academic-cv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
publications.bib
1115 lines (1018 loc) · 83.2 KB
/
publications.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%%%%%%%%%% Under review %%%%%%%%%%
@misc{khoram2019toco,
title={{TOCO: A Framework for Compressing Neural Network Models Based on Tolerance Analysis}},
author={Soroosh Khoram and Jing Li},
abstract={Neural network compression methods have enabled deploying large models on emerging edge devices with little cost, by adapting already-trained models to the constraints of these devices. The rapid development of AI-capable edge devices with limited computation and storage requires streamlined methodologies that can efficiently satisfy the constraints of different devices. In contrast, existing methods often rely on heuristic and manual adjustments to maintain accuracy, support only coarse compression policies, or target specific device constraints that limit their applicability. We address these limitations by proposing the TOlerance-based COmpression (TOCO) framework. TOCO uses an in-depth analysis of the model, to maintain the accuracy, in an active learning system. The results of the analysis are tolerances that can be used to perform compression in a fine-grained manner. Finally, by decoupling compression from the tolerance analysis, TOCO allows flexibility to changes in the hardware.},
year={2019},
date={2019-12-18},
eprint={1912.08792},
archivePrefix={arXiv},
primaryClass={cs.LG},
pubstate={preprint},
url={https://arxiv.org/abs/1912.08792},
keywords={whitepaper}
}
@misc{khoram2019interleaved,
title={{Interleaved Composite Quantization for High-Dimensional Similarity Search}},
author={Soroosh Khoram and Stephen J Wright and Jing Li},
abstract={Similarity search retrieves the nearest neighbors of a query vector from a dataset of high-dimensional vectors. As the size of the dataset grows, the cost of performing the distance computations needed to implement a query can become prohibitive. A method often used to reduce this computational cost is quantization of the vector space and location-based encoding of the dataset vectors. These encodings can be used during query processing to find approximate nearest neighbors of the query point quickly. Search speed can be improved by using shorter codes, but shorter codes have higher quantization error, leading to degraded precision. In this work, we propose the Interleaved Composite Quantization (ICQ) which achieves fast similarity search without using shorter codes. In ICQ, a small subset of the code is used to approximate the distances, with complete codes being used only when necessary. Our method effectively reduces both code length and quantization error. Furthermore, ICQ is compatible with several recently proposed techniques for reducing quantization error and can be used in conjunction with these other techniques to improve results. We confirm these claims and show strong empirical performance of ICQ using several synthetic and real-word datasets.},
year={2019},
date={2019-12-18},
eprint={1912.08756},
archivePrefix={arXiv},
primaryClass={cs.LG},
pubstate={preprint},
url={https://arxiv.org/abs/1912.08756},
keywords={whitepaper}
}
@misc{alex2019mlsys,
title={{MLSys: The New Frontier of Machine Learning Systems}},
author={Alexander Ratner and Dan Alistarh and Gustavo Alonso and David G. Andersen and Peter Bailis and Sarah Bird and Nicholas Carlini and Bryan Catanzaro and Jennifer Chayes and Eric Chung and Bill Dally and Jeff Dean and Inderjit S. Dhillon and Alexandros Dimakis and Pradeep Dubey and Charles Elkan and Grigori Fursin and Gregory R. Ganger and Lise Getoor and Phillip B. Gibbons and Garth A. Gibson and Joseph E. Gonzalez and Justin Gottschlich and Song Han and Kim Hazelwood and Furong Huang and Martin Jaggi and Kevin Jamieson and Michael I. Jordan and Gauri Joshi and Rania Khalaf and Jason Knight and Jakub Konečný and Tim Kraska and Arun Kumar and Anastasios Kyrillidis and Aparna Lakshmiratan and Jing Li and Samuel Madden and H. Brendan McMahan and Erik Meijer and Ioannis Mitliagkas and Rajat Monga and Derek Murray and Kunle Olukotun and Dimitris Papailiopoulos and Gennady Pekhimenko and Theodoros Rekatsinas and Afshin Rostamizadeh and Christopher Ré and Christopher De Sa and Hanie Sedghi and Siddhartha Sen and Virginia Smith and Alex Smola and Dawn Song and Evan Sparks and Ion Stoica and Vivienne Sze and Madeleine Udell and Joaquin Vanschoren and Shivaram Venkataraman and Rashmi Vinayak and Markus Weimer and Andrew Gordon Wilson and Eric Xing and Matei Zaharia and Ce Zhang and Ameet Talwalkar},
year={2019},
eprint={1904.03257},
archivePrefix={arXiv},
primaryClass={cs.LG},
pubstate={preprint},
keywords={whitepaper}
}
@article{ratner2019sysml,
author = {Ratner, Alexander and Alistarh, Dan and Alonso, Gustavo and Andersen, David G. and Bailis, Peter and Bird, Sarah and Carlini, Nicholas and Catanzaro, Bryan and Chayes, Jennifer and Chung, Eric and Dally, Bill and Dean, Jeff and Dhillon, Inderjit S. and Dimakis, Alexandros and Dubey, Pradeep and Elkan, Charles and Fursin, Grigori and Ganger, Gregory R. and Getoor, Lise and Gibbons, Phillip B. and Gibson, Garth A. and Gonzalez, Joseph E. and Gottschlich, Justin and Han, Song and Hazelwood, Kim and Huang, Furong and Jaggi, Martin and Jamieson, Kevin and Jordan, Michael I. and Joshi, Gauri and Khalaf, Rania and Knight, Jason and Kone{\v{c}}n{\'{y}}, Jakub and Kraska, Tim and Kumar, Arun and Kyrillidis, Anastasios and Lakshmiratan, Aparna and Li, Jing and Madden, Samuel and McMahan, H. Brendan and Meijer, Erik and Mitliagkas, Ioannis and Monga, Rajat and Murray, Derek and Olukotun, Kunle and Papailiopoulos, Dimitris and Pekhimenko, Gennady and Rekatsinas, Theodoros and Rostamizadeh, Afshin and R{\'{e}}, Christopher and {De Sa}, Christopher and Sedghi, Hanie and Sen, Siddhartha and Smith, Virginia and Smola, Alex and Song, Dawn and Sparks, Evan and Stoica, Ion and Sze, Vivienne and Udell, Madeleine and Vanschoren, Joaquin and Venkataraman, Shivaram and Vinayak, Rashmi and Weimer, Markus and Wilson, Andrew Gordon and Xing, Eric and Zaharia, Matei and Zhang, Ce and Talwalkar, Ameet},
title = {SysML: The New Frontier of Machine Learning Systems},
journal = {arXiv preprint arXiv:1904.03257},
volume = {abs/1904.03257},
year = {2019},
month = {mar},
date = {2019-03-29},
url = {http://arxiv.org/abs/1904.03257},
archivePrefix = {arXiv},
eprint = {1904.03257},
abstract = {Machine learning (ML) techniques are enjoying rapidly increasing adoption. However, designing and implementing the systems that support ML models in real-world deployments remains a significant obstacle, in large part due to the radically different development and deployment profile of modern ML methods, and the range of practical concerns that come with broader adoption. We propose to foster a new systems machine learning research community at the intersection of the traditional systems and ML communities, focused on topics such as hardware systems for ML, software systems for ML, and ML optimized for metrics beyond predictive accuracy. To do this, we describe a new conference, SysML, that explicitly targets research at the intersection of systems and machine learning with a program committee split evenly between experts in systems and ML, and an explicit focus on topics at the intersection of the two.},
keywords = {subpaper},
note = {preprint},
}
%%%%%%%%%% Referred Journal %%%%%%%%%%
@ARTICLE{zha2020jssc,
author={Zha, Yue and Nowak, Etienne and Li, Jing},
journal={IEEE Journal of Solid-State Circuits (**JSSC**)},
title={{Liquid Silicon}: A Nonvolatile Fully Programmable Processing-In-Memory Processor with Monolithically Integrated {ReRAM} for {Big Data/Machine Learning} Applications (**invited**)},
year = {2020},
pubstate = {in print},
keywords = {journal}
}
@article{shukla2018frontiers,
title = {Computing Generalized Matrix Inverse on Spiking Neural Substrate},
author = {Rohit Shukla and Soroosh Khoram and Erik Jorgensen and Jing Li and Mikko Lipasti and Stephen Wright},
year = {2018},
journal = {Frontiers in neuroscience: Neuromorphic engineering},
volume={12},
pages={115},
year={2018},
month={Feb},
date={2018-02-14},
doi={10.3389/fnins.2018.00115},
abstract={Emerging neural hardware substrates, such as IBM's TrueNorth Neurosynaptic System, can provide an appealing platform for deploying numerical algorithms. For example, a recurrent Hopfield neural network can be used to find the Moore-Penrose generalized inverse of a matrix, thus enabling a broad class of linear optimizations to be solved efficiently, at low energy cost. However, deploying numerical algorithms on hardware platforms that severely limit the range and precision of representation for numeric quantities can be quite challenging. This paper discusses these challenges and proposes a rigorous mathematical framework for reasoning about range and precision on such substrates. The paper derives techniques for normalizing inputs and properly quantizing synaptic weights originating from arbitrary systems of linear equations, so that solvers for those systems can be implemented in a provably correct manner on hardware-constrained neural substrates. The analytical model is empirically validated on the IBM TrueNorth platform, and results show that the guarantees provided by the framework for range and precision hold under experimental conditions. Experiments with optical flow demonstrate the energy benefits of deploying a reduced-precision and energy-efficient generalized matrix inverse engine on the IBM TrueNorth platform, reflecting 10× to 100× improvement over FPGA and ARM core baselines.},
ISSN={1662-453X},
keywords={journal}
}
@article{zha2018JOLPE,
title = {Specialization: A New Path towards Low Power (INVITED)},
author = {Yue Zha and Jing Li},
year = {2018},
date = {2018-02-15},
journal = {ASP Journal of Low Power Electronics, 2018},
volume = {14},
number = {2},
tppubtype = {article},
doi={10.1166/jolpe.2018.1559},
keywords={journal}
}
@ARTICLE{khoram2018CAL,
author={Khoram, Soroosh and Zha, Yue and Li, Jing},
journal={IEEE Computer Architecture Letters},
title={An Alternative Analytical Approach to Associative Processing (Best of CAL)},
year={2018},
month={July},
date={2018-01-03},
volume={17},
number={2},
pages={113-116},
abstract={Associative Processing (AP) is a promising alternative to the Von Neumann model as it addresses the memory wall problem through its inherent in-memory computations. However, because of the countless design parameter choices, comparisons between implementations of two so radically different models are challenging for simulation-based methods. To tackle these challenges, we develop an alternative analytical approach based on a new concept called architecturally-determined complexity. Using this method, we asymptotically evaluate the runtime/storage/energy bounds of the two models, i.e., AP and Von Neumann. We further apply the method to gain more insights into the performance bottlenecks of traditional AP and develop a new machine model named Two Dimensional AP to address these limitations. Finally, we experimentally validate our analytical method and confirm that the simulation results match our theoretical projections.},
keywords={journal, Analytical models,Complexity theory,Computational modeling,Computer architecture,Parallel processing,Runtime,Two dimensional displays,Analysis of Algorithms and Problem Complexity,Associative Processors,Modeling techniques,Models of Computation},
doi={10.1109/LCA.2018.2789424},
ISSN={1556-6056},
}
@ARTICLE{zha2017CALCMA,
author={Yue Zha and Jing Li},
journal={IEEE Computer Architecture Letters},
title={{CMA}: A Reconfigurable Complex Matching Accelerator for Wire-speed Network Intrusion Detection},
year={2017},
date={2017-07-03},
volume={17},
number={1},
pages={33-36},
keywords={journal, Computer architecture,Coprocessors,Encoding,IP networks,Intrusion detection,Ports (Computers),Accelerator,Intrusion Detection,Network Security,ReRAM,TCAM},
doi={10.1109/LCA.2017.2719023},
ISSN={1556-6056},
}
@ARTICLE{zha2017CALIMEC,
author={Yue Zha and Jing Li},
journal={IEEE Computer Architecture Letters},
title={{IMEC}: A Fully Morphable In-Memory Computing Fabric Enabled by Resistive Crossbar},
year={2017},
volume={16},
number={2},
pages={123--126},
keywords={journal, Decoding,Energy efficiency,Field programmable gate arrays,Nonvolatile memory,Program processors,Non-volatile memory,TCAM,energy-efficiency computing,processing-in-memory},
doi={10.1109/LCA.2017.2672558},
ISSN={1556-6056},
month={Feb},
date={2017-02-22},
}
@ARTICLE{li2014jssc,
author={Jing Li and Robert Montoye and Masatoshi Ishii and Leland Chang},
journal={IEEE Journal of Solid-State Circuits},
title={1 {Mb} 0.41 um^2 {2T-2R} cell nonvolatile {TCAM} with two-bit encoding and clocked self-referenced sensing (INVITED)},
year={2014},
volume={49},
number={4},
pages={896--907},
keywords={journal, content-addressable storage,encoding,phase change memories,2T 2R cell nonvolatile TCAM,CMOS technology,algorithmic mapping,clocked self referenced sensing,phase change memory technology,resistive memories,size 90 nm,time 1.9 ns,two bit encoding,Arrays,Encoding,Microprocessors,Phase change materials,Random access memory,Sensors,Associative computing,encoding,hardware accelerator,intrusion detection,matchline compensation,nonvolatile,packet classification,phase change memory (PCM),search engine,self-referenced sensing,ternary content addressable memory (TCAM)},
abstract={This work demonstrates the first fabricated 1 Mb nonvolatile TCAM using 2-transistor/2-resistive-storage (2T-2R) cells to achieve >10× smaller cell size than SRAM-based TCAMs at the same technology node. The test chip was designed and fabricated in IBM 90 nm CMOS technology and mushroom phase-change memory (PCM) technology. The primary challenge for enabling reliable array operation with such aggressive cell is presented, namely, severely degraded sensing margin due to significantly lower ON/OFF ratio of resistive memories (~10^2 for PCM) than that of traditional MOSFETs (>10^5 ). To address this challenge, two enabling techniques were developed and implemented in hardware: 1) two-bit encoding and 2) a clocked self-referenced sensing scheme (CSRSS). In addition, the two-bit encoding can also improve algorithmic mapping by effectively compressing TCAM entries. The 1 Mb chip demonstrates reliable low voltage search operation (VDDmin ~750 mV) and a match delay of 1.9 ns under nominal operating conditions.},
doi={10.1109/JSSC.2013.2292055},
ISSN={0018-9200},
month={April},
}
@article{cil2013thinfilm,
title={Assisted cubic to hexagonal phase transition in GeSbTe thin films on silicon nitride},
author={Cil, K and Zhu, Y and Li, Jing and Lam, CH and Silva, H},
journal={Thin Solid Films},
volume={536},
pages={216--219},
year={2013},
publisher={Elsevier},
issn = {0040-6090},
doi = {10.1016/j.tsf.2013.03.087},
url = {http://www.sciencedirect.com/science/article/pii/S0040609013005476},
keywords = {Phase change memory, Germanium–antimony–tellurium, Phase transition temperature, Face-centered cubic, Hexagonal close-packed, Substrate dependence, Silicon nitride, Silicon dioxide},
keywords={journal}
}
@ARTICLE{zhang2012TED,
author={Xiao Zhang and Jerome Mitard and Lars-Ake Ragnarsson and Tomas Hoffmann and Michael Deal and Melody E. Grubbs and Jing Li and Blanka Magyari-Kope and Bruce M. Clemens and Yoshio Nishi},
journal={IEEE Transactions on Electron Devices},
title={Theory and Experiments of the Impact of Work-Function Variability on Threshold Voltage Variability in {MOS} Devices},
year={2012},
volume={59},
number={11},
pages={3124--3126},
keywords={journal, MOSFET,failure analysis,probability,random-access storage,semiconductor device models,semiconductor device reliability,MOS devices,MOSFET,WFV,grain orientation,polycrystalline metal gate,random dopant fluctuation,size 22 nm,static RAM failure probability,threshold voltage variability,work-function variability,Integrated circuit modeling,Logic gates,Random access memory,Resource description framework,Semiconductor device modeling,MOSFETS,Metal gate,variability,work function (WF)},
doi={10.1109/TED.2012.2212021},
ISSN={0018-9383},
month={Nov},
}
@article{cywar2012nano,
author={Adam Cywar and Jing Li and Chung Lam and Helena Silva},
title={The impact of heater-recess and load matching in phase change memory mushroom cells},
journal={Nanotechnology},
volume={23},
number={22},
pages={225201},
url={http://stacks.iop.org/0957-4484/23/i=22/a=225201},
year={2012},
date={2012-05-10},
doi={10.1088/0957-4484/23/22/225201},
keywords={journal},
}
@Article{li2011sciencechina,
author={Li, Jing and Lam, Chung},
title={Phase change memory (INVITED)},
journal={Science China Information Sciences},
year={2011},
month={May},
day={01},
volume={54},
number={5},
pages={1061--1072},
abstract={Phase change memory (PCM) is a non-volatile solid-state memory technology based on the large resistivity contrast between the amorphous and crystalline states in phase change materials. We present the physics behind this large resistivity contrast and describe how it is being exploited to create high density PCM. We address the challenges facing this technology, including the design of PCM cells, fabrication, device variability, thermal cross-talk and write disturb. We discuss the scalability, assess the performance, and examine the reliability of PCM including data retention, multi-bit storage and endurance.},
issn={1869-1919},
doi={10.1007/s11432-011-4223-x},
url={https://doi.org/10.1007/s11432-011-4223-x},
keywords={journal}
}
@ARTICLE{li2010tvlsi,
author={Jing Li and Patrick Ndai and Ashish Goel and Sayeef Salahuddin and Kaushik Roy},
journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
title={Design Paradigm for Robust Spin-Torque Transfer Magnetic {RAM} ({STT} {MRAM}) From Circuit/Architecture Perspective (Best Paper)},
year={2010},
volume={18},
number={12},
pages={1710--1723},
keywords={journal, integrated circuit design,magnetic storage,random-access storage,high memory yield,parametric failures,process variations,robust spin-torque transfer magnetic RAM,Circuit stability,Costs,Failure analysis,Flash memory,Magnetic circuits,Performance analysis,Random access memory,Read-write memory,Robustness,Scalability,Spin-torque transfer (STT),magnetic ram (MRAM),memory yield,parametric failures},
doi={10.1109/TVLSI.2009.2027907},
ISSN={1063-8210},
month={Dec},
}
@ARTICLE{chen2010tvlsi,
author={Yiran Chen and Hai Li and Cheng-Kok Koh and Guangyu Sun and Jing Li and Yuan Xie and Kaushik Roy},
journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
title={Variable-Latency Adder ({VL-Adder}) Designs for Low Power and {NBTI} Tolerance},
year={2010},
volume={18},
number={11},
pages={1621--1624},
keywords={journal, adders,digital arithmetic,integrated circuit design,logic design,IC design,NBTI tolerance,circuit delay,digital arithmetic,logic design,negative bias temperature instability,variable-latency adder designs,word length 64 bit,Adders,Circuits,Clocks,Delay,Negative bias temperature instability,Niobium compounds,Sun,Throughput,Titanium compounds,Very large scale integration,Digital arithmetic,IC design,logic design},
doi={10.1109/TVLSI.2009.2026280},
ISSN={1063-8210},
month={Nov},
}
@ARTICLE{li2009tcad,
author={Jing Li and Kunhyuk Kang and Kaushik Roy},
journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
title={Variation Estimation and Compensation Technique in Scaled {LTPS} {TFT} Circuits for Low-Power Low-Cost Applications},
year={2009},
volume={28},
number={1},
pages={46--59},
keywords={journal, CMOS integrated circuits,circuit reliability,elemental semiconductors,low-power electronics,silicon,statistical analysis,thin film transistors,CMOS technology,Si,circuit reliability,compensation technique,delay variation,four-finger structure,inverter chain,low-power low-cost application,low-temperature polycrystalline-silicon thin-film transistor,multifinger design technique,multimodal delay distribution,response surface method,statistical simulation methodology,unimodal distribution,variation estimation,CMOS logic circuits,CMOS technology,Circuit simulation,Delay,Grain boundaries,Logic devices,Response surface methodology,Robustness,Substrates,Thin film transistors,Grain boundary (GB),low-temperature polycrystalline-silicon (LTPS),process variation,thin-film transistor (TFT)},
doi={10.1109/TCAD.2008.2009149},
ISSN={0278-0070},
month={Jan},
keywords={journal}}
@article{li2008jetc,
author = {Li, Jing and Bansal, Aditya and Ghosh, Swarop and Roy, Kaushik},
title = {An Alternate Design Paradigm for Low-power, Low-cost, Testable Hybrid Systems Using Scaled {LTPS} {TFTs} (INVITED)},
journal = {J. Emerg. Technol. Comput. Syst.},
issue_date = {August 2008},
volume = {4},
number = {3},
month = {Aug},
year = {2008},
issn = {1550-4832},
pages = {13:1--13:19},
articleno = {13},
numpages = {19},
url = {http://doi.acm.org/10.1145/1389089.1389093},
doi = {10.1145/1389089.1389093},
acmid = {1389093},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {3D integration, BIST, DFT, Low-temperature polycrystalline silicon (LTPS), generic, grain boundary (GB), hybrid system, inherent variation, reconfigurable, thin-film transistor (TFT)},
keywords={journal}
}
@ARTICLE{li2007ted,
author={Jing Li and Aditya Bansal and Kaushik Roy},
journal={IEEE Transactions on Electron Devices},
title={{Poly-Si} Thin-Film Transistors: An Efficient and Low-Cost Option for Digital Operation},
year={2007},
volume={54},
number={11},
pages={2918-2929},
keywords={journal, elemental semiconductors,low-power electronics,silicon,silicon-on-insulator,thin film transistors,LTPS TFT,SOI,Si - Interface,driving current,low-temperature polycrystalline-silicon thin-film transistors,midgap trap density,poly-Si thin-film transistors,silicon-on-insulator,single-crystalline silicon,submicrometer ultralow-power digital operation,ultralow-power subthreshold operation,Costs,Design methodology,Design optimization,Energy consumption,Fabrication,Glass,Polymers,Silicon,Substrates,Thin film transistors,Grain boundary (GB),low-pressure chemical vapor deposition (LPCVD),low-temperature polycrystalline silicon (LTPS),thin-film transistor (TFT)},
doi={10.1109/TED.2007.906940},
ISSN={0018-9383},
month={Nov},
}
%%%%%%%%%% Referred conference %%%%%%%%%%
@inproceedings{zha2020asplos,
author = {Zha, Yue and Li, Jing},
title = {{ViTAL: Virtualizing FPGAs in the Cloud}},
booktitle = {the 24th ACM International Conference on Architectural Support for Programming Languages and Operating Systems},
series = {**ASPLOS** '20},
year = {2020},
pubstate = {upcoming},
note = {},
keywords = {conference}
}
@inproceedings{luo2019vlsit,
author = {Luo, Qing and Yu, Jie and Zhang, Xumeng and Xue, Kan-Hao and Cheng, Yan and Gong, Tiancheng and Lv, Hangbing and Xu, Xiaoxin and Yuan, Peng and Yin, Jiahao and Tai, Lu and Long, Shibing and Liu, Qi and Li, Jing and Liu, Ming},
title = {Nb\textsubscript{1-x}O\textsubscript{2} based Universal Selector with Ultra-high Endurance (>10\textsuperscript{12}), high speed (10ns) and Excellent V\textsubscript{th} Stability},
booktitle = {2019 IEEE Symposium on VLSI Technology},
year = {2019},
date={2019-06-09},
month={Jun},
note = {},
doi={10.23919/VLSIT.2019.8776546},
keywords = {conference}
}
@inproceedings{zha2019vlsic,
author = {Zha, Yue and Nowak, Etienne and Li, Jing},
title = {{Liquid Silicon}: A Nonvolatile Fully Programmable Processing-In-Memory Processor with Monolithically Integrated {ReRAM} for {Big Data/Machine Learning} Applications},
booktitle = {2019 IEEE Symposium on VLSI Circuits},
year = {2019},
month = {Jun},
date={2019-06-09},
note = {},
doi={10.23919/VLSIC.2019.8778064},
abstract={A nonvolatile fully programmable processing-in-memory (PIM) processor named Liquid Silicon (L-Si) is demonstrated, which combines the superior programmability of general-purpose computing devices (e.g. FPGA) and the high power efficiency of domain-specific accelerators. Besides the general computing applications, L-Si is particularly well suited for AI/machine learning and big data applications, which not only pose high computational/memory demand but also evolves rapidly. L-Si is fabricated by monolithically integrating HfO 2 resistive RAM on top of commercial 130nm Si CMOS. Our measurement confirmed the fabricated chip operates reliably at low voltage of 650 mV. It achieves 60.9 TOPS/W in performing neural network inferences and 480 GOPS/W in performing content-based similarity search (a key big data application) at nominal voltage supply of 1.2V, showing >3× and ~100× power efficiency improvement over the state-of-the-art domain-specific CMOS-/RRAM-based accelerators. In addition, it outperforms the latest nonvolatile FPGA in energy efficiency by ~3× in general compute-intensive applications.},
keywords = {conference}
}
@inproceedings{zhang2019fccm,
author = {Zhang, Jialiang and Liu, Yang and Jain, Gaurav and Zha, Yue and Ta, Jonathan and Li, Jing},
title = {{MEG}: {A RISCV}-based system simulation infrastructure for exploring memory optimization using {FPGAs} and {Hybrid Memory Cube} (Best Paper Nominee)},
booktitle = {2019 IEEE 27th Annual International Symposium on Field-Programmable Custom Computing Machines (**FCCM**)},
year = {2019},
month = {April},
date={2019-04-28},
doi={0.1109/FCCM.2019.00029},
abstract={Emerging 3D memory technologies, such as the Hybrid Memory Cube (HMC) and High Bandwidth Memory (HBM), provide increased bandwidth and massive memory-level parallelism. Efficiently integrating emerging memories into existing system pose new challenges and require detailed evaluation in a real computing environment. In this paper, we propose MEG, an open-source, configurable, cycle-exact, and RISC-V based full system simulation infrastructure using FPGA and HMC. MEG has three highly configurable design components: (i) a HMC adaptation module that not only enables communication between the HMC device and the processor cores but also can be extended to fit other memories (e.g., HBM, nonvolatile memory) with minimal effort, (ii) a reconfigurable memory controller along with its OS support that can be effectively leveraged by system designers to perform software-hardware co-optimization, and (iii) a performance monitor module that effectively improves the observability and debuggability of the system to guide performance optimization. We provide a prototype implementation of MEG on Xilinx VCU110 board and demonstrate its capability, fidelity, and flexibility on real-world benchmark applications. We hope that our open-source release of MEG fills a gap in the space of publicly-available FPGA-based full system simulation infrastructures specifically targeting memory system and inspires further collaborative software/hardware innovations.},
note = {},
keywords = {conference}
}
@inproceedings{zhang2019fpga,
author = {Zhang, Jialiang and Li, Jing},
title = {{Unleashing the Power of Soft Logic for Convolutional Neural Network Acceleration via Product Quantization} (Poster)},
booktitle = {the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '19},
year = {2019},
month = {Feb},
date={2019-02-24},
doi={10.1145/3289602.3293951},
note = {},
keywords = {conference}
}
@inproceedings{zhang2018fccm,
author = {Zhang, Jialiang and Li, Jing},
title = {{PQ-CNN}: {Accelerating} Product Quantized Convolutional Neural Network (Poster)},
booktitle = {2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (**FCCM**)},
year = {2018},
month = {April},
date={2018-04-29},
doi={10.1109/FCCM.2018.00041},
note = {},
keywords = {conference}
}
@inproceedings{zhang2018cvpr,
author = {Zhang, Jialiang and Khoram, Soroosh and Li, Jing},
title = {Efficient Large-scale Approximate Nearest Neighbor Search on the {OpenCL-FPGA}},
booktitle = {Conference on Computer Vision and Pattern Recognition (**CVPR**)},
year = {2018},
month={Jun},
date={2018-06},
abstract={We present a new method for Product Quantization (PQ) based approximated nearest neighbor search (ANN) in high dimensional spaces. Specifically, we first propose a quantization scheme for the codebook of coarse quantizer, product quantizer, and rotation matrix, to reduce the cost of accessing these codebooks. Our approach also combines a highly parallel k-selection method, which can be fused with the distance calculation to reduce the memory overhead. We implement the proposed method on Intel HARPv2 platform using OpenCL-FPGA. The proposed method significantly outperforms state-of-the-art methods on CPU and GPU for high dimensional nearest neighbor queries on billion-scale datasets in terms of query time and accuracy regardless of the batch size. To our best knowledge, this is the first work to demonstrate FPGA performance superior to CPU and GPU on high-dimensional, large-scale ANN datasets.},
doi={DOI: 10.1109/CVPR.2018.00517},
pages={4924--4932},
note = {(Acceptance Rate: \underline{29\%}, 979 out of over 3300)},
keywords = {conference}
}
@inproceedings{khoram2018iclr,
author = {Khoram, Soroosh and Li, Jing},
title = {Adaptive Quantization of Neural Networks},
booktitle = {International Conference on Learning Representations (**ICLR**)},
year = {2018},
month={April},
date={2018-04},
abstract={Despite the state-of-the-art accuracy of Deep Neural Networks (DNN) in various classification problems, their deployment onto resource constrained edge computing devices remains challenging due to their large size and complexity. Several recent studies have reported remarkable results in reducing this complexity through quantization of DNN models. However, these studies usually do not consider the changes in the loss function when performing quantization, nor do they take the different importances of DNN model parameters to the accuracy into account. We address these issues in this paper by proposing a new method, called adaptive quantization, which simplifies a trained DNN model by finding a unique, optimal precision for each network parameter such that the increase in loss is minimized. The optimization problem at the core of this method iteratively uses the loss function gradient to determine an error margin for each parameter and assigns it a precision accordingly. Since this problem uses linear functions, it is computationally cheap and, as we will show, has a closed-form approximate solution. Experiments on MNIST, CIFAR, and SVHN datasets showed that the proposed method can achieve near or better than state-of-the-art reduction in model size with similar error rates. Furthermore, it can achieve compressions close to floating-point model compression methods without loss of accuracy.},
url={https://openreview.net/forum?id=SyOK1Sg0W},
keywords = {conference},
note = {(Acceptance Rate: \underline{34\%}, 314 out of 935)}
}
@INPROCEEDINGS{li2018CSTIC,
author={Li,Jing},
booktitle={2018 China Semiconductor Technology International Conference (CSTIC)},
title={Nonvolatile Memory Outlook: Technology Driven or Application Driven? (INVITED)},
year={2018},
date = {2018-03-12},
volume={},
number={},
pages={1--4},
ISSN={},
month={March},
keywords = {conference}
}
@inproceedings{zha2018asplos,
author = {Zha, Yue and Li, Jing},
title = {{Liquid Silicon-Monona}: A Reconfigurable Memory-Oriented Computing Fabric with Scalable Multi-Context Support},
booktitle = {23nd International Conference on Architectural Support for Programming Languages and Operating Systems},
series = {**ASPLOS** '18},
year = {2018},
month={Mar},
date={2018-03-19},
location = {Williamsburg, VA, USA},
pages = {214--228},
volume={53},
issue={2},
url = {http://doi.acm.org/10.1145/3173162.3173167},
doi = {10.1145/3173162.3173167},
acmid = {},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {},
abstract={With the recent trend of promoting Field-Programmable Gate Arrays (FPGAs) to first-class citizens in accelerating compute-intensive applications in networking, cloud services and artificial intelligence, FPGAs face two major challenges in sustaining competitive advantages in performance and energy efficiency for diverse cloud workloads: 1) limited configuration capability for supporting light-weight computations/on-chip data storage to accelerate emerging search-/data-intensive applications. 2) lack of architectural support to hide reconfiguration overhead for assisting virtualization in a cloud computing environment. In this paper, we propose a reconfigurable memory-oriented computing fabric, namely Liquid Silicon-Monona (L-Si), enabled by emerging nonvolatile memory technology i.e. RRAM, to address these two challenges. Specifically, L-Si addresses the first challenge by virtue of a new architecture comprising a 2D array of physically identical but functionally-configurable building blocks. It, for the first time, extends the configuration capabilities of existing FPGAs from computation to the whole spectrum ranging from computation to data storage. It allows users to better customize hardware by flexibly partitioning hardware resources between computation and memory, greatly benefiting emerging search- and data-intensive applications. To address the second challenge, L-Si provides scalable multi-context architectural support to minimize reconfiguration overhead for assisting virtualization. In addition, we provide compiler support to facilitate the programming of applications written in high-level programming languages (e.g. OpenCL) and frameworks (e.g. TensorFlow, MapReduce) while fully exploiting the unique architectural capability of L-Si. Our evaluation results show L-Si achieves 99.6\% area reduction, 1.43× throughput improvement and 94.0\% power reduction on search-intensive benchmarks, as compared with the FPGA baseline. For neural network benchmarks, on average, L-Si achieves 52.3× speedup, 113.9× energy reduction and 81\% area reduction over the FPGA baseline. In addition, the multi-context architecture of L-Si reduces the context switching time to - 10ns, compared with an off-the-shelf FPGA (∼100ms), greatly facilitating virtualization.},
keywords = {conference},
note = {(Acceptance Rate: \underline{18.2\%}, 56 out of 307)}
}
@inproceedings{zhang2018fpga,
author = {Zhang, Jialiang and Li, Jing},
title = {Degree-aware Hybrid Graph Traversal on {FPGA-HMC} Platform},
booktitle = {Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '18},
year = {2018},
month={Feb},
date={2018-02-25},
pages = {229--238},
location = {Monterey, California, USA},
publisher = {ACM},
address = {New York, NY, USA},
url = {http://doi.acm.org/10.1145/3174243.3174245},
doi = {10.1145/3174243.3174245},
keywords = {conference, graph processor, hybrid memory cube, bfs},
abstract={Graph traversal is a core primitive for graph analytics and a basis for many higher-level graph analysis methods. However, irregularities in the structure of scale-free graphs (e.g., social network) limit our ability to analyze these important and growing datasets. A key challenge is the redundant graph computations caused by the presence of high-degree vertices which not only increase the total amount of computations but also incur unnecessary random data access. In this paper, we present a graph processing system on an FPGA-HMC platform, based on software/hardware co-design and co- optimization. For the first time, we leverage the inherent graph property i.e. vertex degree to co-optimize algorithm and hardware architecture. In particular, we first develop two algorithm optimization techniques:degree-aware adjacency list reordering anddegree-aware vertex index sorting. The former can reduce the number of redundant graph computations, while the latter can create a strong correlation between vertex index and data access frequency, which can be effectively applied to guide the hardware design. We further implement the optimized hybrid graph traversal algorithm on an FPGA-HMC platform. By leveraging the strong correlation between vertex index and data access frequency made by degree-aware vertex index sorting, we develop two platform-dependent hardware optimization techniques, namely degree-aware data placement and degree-aware adjacency list compression. These two techniques together substantially reduce the amount of access to external memory. Finally, we conduct extensive experiments on an FPGA-HMC platform to verify the effectiveness of the proposed techniques. To the best of our knowledge, our implementation achieves the highest performance (45.8 billion traversed edges per second) among existing FPGA-based graph processing systems.},
note = {(Acceptance Rate*: \underline{24\%})}
}
@inproceedings{zha2018fpga,
author = {Zha, Yue and Li, Jing},
title = {{Liquid Silicon}: A Data-Centric Reconfigurable Architecture enabled by {RRAM} Technology},
booktitle = {Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '18},
year = {2018},
month={Feb},
date={2018-02-25},
pages = {51--60},
url = {http://doi.acm.org/10.1145/3174243.3174244},
doi = {10.1145/3174243.3174244},
location = {Monterey, California, USA},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {conference, monolithic stacking, non-volatile memory, processing-in-memory, reconfigurable architecture, tcam},
abstract={This paper presents a data-centric reconfigurable architecture, namely Liquid Silicon, enabled by emerging non-volatile memory, i.e., RRAM. Compared to the heterogeneous architecture of commercial FPGAs, Liquid Silicon is inherently a homogeneous architecture comprising a two-dimensional (2D) array of identical 'tiles'. Each tile can be configured into one or a combination of four modes: TCAM, logic, interconnect, and memory. Such flexibility allows users to partition resources based on applications? needs, in contrast to the fixed hardware design using dedicated hard IP blocks in FPGAs. In addition to better resource usage, its 'memory friendly' architecture effectively addresses the limitations of commercial FPGAs i.e., scarce on-chip memory resources, making it an effective complement to FPGAs. Moreover, its coarse-grained logic implementation results in shallower logic depth, less inter-tile routing overhead, and thus smaller area and better performance, compared with its FPGA counterpart. Our study shows that, on average, for both traditional and emerging applications, we achieve 62\% area reduction, 27\% speedup and 31\% improvement in energy efficiency when mapping applications onto Liquid Silicon instead of FPGAs.},
note = {(Acceptance Rate*: \underline{24\%}, Ranked **\#1** among 100+ submissions)}
}
@inproceedings{khoram2018fpga,
author = {Khoram, Soroosh and Zhang, Jialiang and Strange, Maxwell and Li, Jing},
title = {Accelerating Graph Analytics By Co-Optimizing Storage and Access on an {FPGA-HMC} Platform},
booktitle = {Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '18},
year = {2018},
month={Feb},
date={2018-02-25},
pages = {239--248},
url = {http://doi.acm.org/10.1145/3174243.3174260},
doi = {10.1145/3174243.3174260},
location = {Monterey, California, USA},
publisher = {ACM},
address = {New York, NY, USA},
abstract={Graph analytics, which explores the relationships among interconnected entities, is becoming increasingly important due to its broad applicability, from machine learning to social sciences. However, due to the irregular data access patterns in graph computations, one major challenge for graph processing systems is performance. The algorithms, softwares, and hardwares that have been tailored for mainstream parallel applications are generally not effective for massive, sparse graphs from the real-world problems, due to their complex and irregular structures. To address the performance issues in large-scale graph analytics, we leverage the exceptional random access performance of the emerging Hybrid Memory Cube (HMC) combined with the flexibility and efficiency of modern FPGAs. In particular, we develop a collaborative software/hardware technique to perform a level-synchronized Breadth First Search (BFS) on a FPGA-HMC platform. From the software perspective, we develop an architecture-aware graph clustering algorithm that exploits the FPGA-HMC platform»s capability to improve data locality and memory access efficiency. From the hardware perspective, we further improve the FPGA-HMC graph processor architecture by designing a memory request merging unit to take advantage of the increased data locality resulting from graph clustering. We evaluate the performance of our BFS implementation using the AC-510 development kit from Micron and achieve $2.8 \times$ average performance improvement compared to the latest FPGA-HMC based graph processing system over a set of benchmarks from a wide range of applications.},
keywords = {conference, graph analytics, graph clustering, hardware accelerators, hybrid memory cube, reconfigurable logic, bfs},
note = {(Acceptance Rate*: \underline{24\%})}
}
@INPROCEEDINGS{zha2017iccad,
author={Yue Zha and Jing Li},
booktitle={2017 IEEE/ACM International Conference on Computer-Aided Design (ICCAD)},
series = {**ICCAD** '17},
title={{RRAM-based} reconfigurable in-memory computing architecture with hybrid routing},
year={2017},
month={Nov},
date={2017-11-13},
volume={},
number={},
pages={527--532},
keywords={conference, Architecture,Delays,Field programmable gate arrays,Logic functions,Routing,Switches,Tiles,CAD Framework,Hybrid Routing,In-Memory Computing,Reconfigurable Architecture,liquid Silicon},
abstract={Recent advances in resistive random-access memory (RRAM) evoke great interests in exploring alternative architectures. One interesting work is a RRAM-based reconfigurable architecture that provides superior programmbility and blurs the boundary between computation and storage, but long-distance routing becomes a performance bottleneck. However, long-distance routing in FPGA is efficiently implemented, but its fine-grained routing structure results in a large routing overhead. In this work, we present a RRAM-based reconfigurable architecture that addresses the routing challenges using hybrid routing, i.e., local and global routing by taking the best advantages of both architectures (prior RRAM-based and FPGA). We also provide a complete CAD framework that exhibits high parallelism and good scalability. Experimental results show that our reconfigurable architecture outperforms both architectures. It achieves a 46.88\% reduction in delay and improves the energy efficiency by 66.23\% compared with the prior RRAM-based architecture with a slightly increased area overhead. While comparing with FPGA, it reduces the delay and the routing overhead by 36.00\% and 50.20\%, respectively. Additionally, our CAD framework achieves 5.39x speedup, compared with the prior framework.},
doi={10.1109/ICCAD.2017.8203822},
ISSN={1558-2434},
note = {(Acceptance Rate: \underline{26\%}, 105 out of 399)},
}
@INPROCEEDINGS{khoram2017fccm,
author={Soroosh Khoram and Jialiang Zhang and Maxwell Strange and Jing Li}, booktitle={2017 IEEE 25th Annual International Symposium on Field-Programmable Custom Computing Machines (**FCCM**)},
title={Accelerating Large-Scale Graph Analytics with {FPGA} and {HMC} (Poster)},
year={2017},
date={2017-04-30},
volume={},
number={82--82},
pages={82--82},
keywords={conference, field programmable gate arrays,graph theory,information retrieval,learning (artificial intelligence),social sciences,tree searching,BFS,FPGA-HMC based graph processing system,breadth first search,hybrid memory cube,interconnected entities,irregular data access pattern,large-scale graph analytics,machine learning,massive-scale sparse graphs,social science,Acceleration,Clustering algorithms,Field programmable gate arrays,Hardware,Merging,Software,Software algorithms,Breadth-First Search,Graph Clustering,Hybrid memory Cube},
doi={10.1109/FCCM.2017.58},
ISSN={},
month={April},
note = {Acceptance rate: \underline{25\%}, 32 out of 128},
}
@inproceedings{khoram2017ISPD,
author = {Khoram, Soroosh and Zha, Yue and Zhang, Jialiang and Li, Jing},
title = {Challenges and Opportunities: From Near-memory Computing to In-memory Computing (INVITED)},
booktitle = {Proceedings of the 2017 ACM on International Symposium on Physical Design},
series = {**ISPD** '17},
year = {2017},
month = {Mar},
date={2017-03-19},
isbn = {978-1-4503-4696-2},
location = {Portland, Oregon, USA},
pages = {43--46},
numpages = {4},
url = {http://doi.acm.org/10.1145/3036669.3038242},
doi = {10.1145/3036669.3038242},
acmid = {3038242},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {conference, 3d integration, in-memory processing, near-memory processing, nonvolatile memory},
abstract={The confluence of the recent advances in technology and the ever-growing demand for large-scale data analytics created a renewed interest in a decades-old concept, processing-in-memory (PIM). PIM, in general, may cover a very wide spectrum of compute capabilities embedded in close proximity to or even inside the memory array. In this paper, we present an initial taxonomy for dividing PIM into two broad categories: 1) Near-memory processing and 2) In-memory processing. This paper highlights some interesting work in each category and provides insights into the challenges and possible future directions.},
note = {(Acceptance Rate*: \underline{35\%})}
}
@INPROCEEDINGS{zha2017CSTIC,
author={Yue Zha and Zhiqiang Wei and Jing Li},
booktitle={2017 China Semiconductor Technology International Conference (CSTIC)},
title={Recent progress in {RRAM} technology: From compact models to applications (INVITED)},
year={2017},
volume={},
number={},
date={2017-03-12},
pages={1--4},
keywords={conference, integrated circuit modelling,product development,resistive RAM,IV characteristics,RRAM technology,SCM,commercialization progress,compact model,drop-in replacement,embedded memory,essential electrical-chemical-thermal properties,nonVon Neumann architecture,product development,standalone memory,storage class memory,switching dynamics,Computational modeling,Computer architecture,Hidden Markov models,Mathematical model,Random access memory,Resistance,Switches},
doi={10.1109/CSTIC.2017.7919731},
ISSN={},
month={March},
}
@inproceedings{zha2017dacwip,
author = {Zha, Yue and Li, Jing},
title = {{RRAM}-based Reconfigurable In-Memory Computing Architecture with Hybrid Routing (poster)},
booktitle = {the 54th Annual Design Automation Conference Work-in-Progress},
series = {DAC-WIP '17},
year = {2017},
date = {2017-06},
month = {Jun},
isbn = {978-1-4503-4927-7},
location = {Austin, TX, USA},
address = {New York, NY, USA},
keywords = {conference},
note = {(Acceptance Rate*: \underline{29\%})},
}
@inproceedings{zhang2017fpgaCNN,
author = {Zhang, Jialiang and Li, Jing},
title = {Improving the Performance of {OpenCL-based FPGA} Accelerator for Convolutional Neural Network},
booktitle = {Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '17},
year = {2017},
date={2017-02-22},
isbn = {978-1-4503-4354-1},
location = {Monterey, California, USA},
pages = {25--34},
numpages = {10},
url = {http://doi.acm.org/10.1145/3020078.3021698},
doi = {10.1145/3020078.3021698},
acmid = {3021698},
publisher = {ACM},
address = {New York, NY, USA},
abstract={OpenCL FPGA has recently gained great popularity with emerging needs for workload acceleration such as Convolutional Neural Network (CNN), which is the most popular deep learning architecture in the domain of computer vision. While OpenCL enhances the code portability and programmability of FPGA, it comes at the expense of performance. The key challenge is to optimize the OpenCL kernels to efficiently utilize the flexible hardware resources in FPGA. Simply optimizing the OpenCL kernel code through various compiler options turns out insufficient to achieve desirable performance for both compute-intensive and data-intensive workloads such as convolutional neural networks.
In this paper, we first propose an analytical performance model and apply it to perform an in-depth analysis on the resource requirement of CNN classifier kernels and available resources on modern FPGAs. We identify that the key performance bottleneck is the on-chip memory bandwidth. We propose a new kernel design to effectively address such bandwidth limitation and to provide an optimal balance between computation, on-chip, and off-chip memory access. As a case study, we further apply these techniques to design a CNN accelerator based on the VGG model. Finally, we evaluate the performance of our CNN accelerator using an Altera Arria 10 GX1150 board. We achieve 866 Gop/s floating point performance at 370MHz working frequency and 1.79 Top/s 16-bit fixed-point performance at 385MHz. To the best of our knowledge, our implementation achieves the best power efficiency and performance density compared to existing work.},
keywords = {conference, convolutional neural networks, fpga, hardware accelerator, opencl},
note = {(Acceptance Rate: \underline{25\%}, 25 out of 101)},
}
@inproceedings{zhang2017fpgaBFS,
author = {Zhang, Jialiang and Khoram, Soroosh and Li, Jing},
title = {Boosting the Performance of {FPGA-based} Graph Processor Using {Hybrid Memory Cube}: A Case for Breadth First Search},
booktitle = {Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '17},
year = {2017},
date={2017-02-22},
isbn = {978-1-4503-4354-1},
location = {Monterey, California, USA},
pages = {207--216},
numpages = {10},
url = {http://doi.acm.org/10.1145/3020078.3021737},
doi = {10.1145/3020078.3021737},
acmid = {3021737},
publisher = {ACM},
address = {New York, NY, USA},
abstract={Large graph processing has gained great attention in recent years due to its broad applicability from machine learning to social science. Large real-world graphs, however, are inherently difficult to process efficiently, not only due to their large memory footprint, but also that most graph algorithms entail memory access patterns with poor locality and a low compute-to-memory access ratio. In this work, we leverage the exceptional random access performance of emerging Hybrid Memory Cube (HMC) technology that stacks multiple DRAM dies on top of a logic layer, combined with the flexibility and efficiency of FPGA to address these challenges. To our best knowledge, this is the first work that implements a graph processing system on a FPGA-HMC platform based on software/hardware co-design and co-optimization. We first present the modifications of algorithm and a platform-aware graph processing architecture to perform level-synchronized breadth first search (BFS) on FPGA-HMC platform. To gain better insights into the potential bottlenecks of proposed implementation, we develop an analytical performance model to quantitatively evaluate the HMC access latency and corresponding BFS performance. Based on the analysis, we propose a two-level bitmap scheme to further reduce memory access and perform optimization on key design parameters (e.g. memory access granularity). Finally, we evaluate the performance of our BFS implementation using the AC-510 development kit from Micron. We achieved 166 million edges traversed per second (MTEPS) using GRAPH500 benchmark on a random graph with a scale of 25 and an edge factor of 16, which significantly outperforms CPU and other FPGA-based large graph processors.},
keywords = {conference, graph processor, hybrid memory cube:bfs},
note = {(Acceptance Rate: \underline{25\%}, 25 out of 101)}
}
@inproceedings{zha2017FPGAposter,
author = {Zha, Yue and Zhang, Jialiang and Wei, Zhiqiang and Li, Jing},
title = {A Mixed-Signal Data-Centric Reconfigurable Architecture Enabled by {RRAM} Technology (poster)},
booktitle = {Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
series = {**FPGA** '17},
year = {2017},
date={2017-02-22},
isbn = {978-1-4503-4354-1},
location = {Monterey, California, USA},
pages = {285--285},
numpages = {1},
url = {http://doi.acm.org/10.1145/3020078.3021759},
doi = {10.1145/3020078.3021759},
acmid = {3021759},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {conference, coarse-grained configuration, mixed-signal processing, non-volatile memory, reconfigurable architecture, ternary content addressable memory},
note = {(Acceptance Rate: \underline{25\%}, 25 out of 101)},
}
@inproceedings{zha2016ICCAD,
title = {Reconfigurable in-memory computing with resistive memory crossbar},
author = {Yue Zha and Jing Li},
url = {http://dl.acm.org/citation.cfm?id=2967069},
doi = {10.1145/2966986.2967069},
isbn = {978-1-4503-4466-1 },
year = {2016},
date = {2016-11-07},
booktitle = {Proceedings of the 35th International Conference on Computer-Aided Design},
location = {Austin, Texas},
pages = {120:1--120:8},
series = {**ICCAD** '16},
organization = {ACM},
keywords = {conference, RRAM, in-memory computing, reconfigurable},
abstract={Driven by recent advances in resistive random-access memory (RRAM), there have been growing interests in exploring alternative computing concept, i.e., in-memory processing, to address the classical von Neumann bottlenecks. Despite of their great promise in improving performance and energy efficiency, most existing works are built on the inherent matrix-vector multiplication capability of RRAM crossbar structure, and thus lack the flexibility to adapt to future market/technology induced changes in data-intensive applications. To address these challenges, we propose an in-memory reconfigurable architecture based on RRAM crossbar structure. For the first time, it achieves a full programmability across computation and storage, and thereby provides more flexibilities of partitioning the hardware resources based on applications' needs. We further develop two complete CAD design flows to facilitate development of applications written in hardware description languages (HDLs) for our architecture, based on: 1) adaption from existing tool set developed for FPGA, 2) a custom tool design optimized towards the new architecture. Our experiments show that, both design flows are effective in exploiting flexible resources offered by our architecture and thus achieves better efficiency than state-of-art FPGAs (30\% improvement in performance with 66\% reduction in area). In addition, compared to adapted design flow, our custom design flow achieves speedup by 3.3×, and further improves mapping quality.},
note = {(Acceptance Rate: \underline{24\%}, 97 out of 408)}
}
@INPROCEEDINGS{xu2016vlsi,
author={Xiaoxin Xu and Qing Luo and Tiancheng Gong and Hangbing Lv and Shibing Long and Qi Liu and Steve S. Chung and Jing Li and Ming Liu},
booktitle={2016 IEEE Symposium on VLSI Technology},
title={Fully {CMOS} compatible {3D} vertical {RRAM} with self-aligned self-selective cell enabling sub-5nm scaling},
year={2016},
date={2016-06},
volume={},
number={},
pages={1--2},
keywords={conference, CMOS memory circuits,integrated circuit manufacture,resistive RAM,CMOS,RRAM,self-aligned self-selective cell,size 5 nm,vertical resistive switching memory,Etching,Hafnium compounds,Leakage currents,Programming,Resistance,Three-dimensional displays,Threshold voltage},
doi={10.1109/VLSIT.2016.7573388},
ISSN={},
month={June},}
@INPROCEEDINGS{guan2016irps,
author={Bochen Guan and Jing Li},
booktitle={2016 IEEE International Reliability Physics Symposium (IRPS)},
title={A compact model for {RRAM} including random telegraph noise},
year={2016},
date={2016-04},
volume={},
number={},
pages={MY-5-1--MY-5-4},
keywords={conference, Monte Carlo methods,current fluctuations,electromagnetic interference,integrated circuit design,integrated circuit reliability,random noise,resistive RAM,telegraphy,Monte Carlo method,RRAM circuit reliability,RRAM compact model,RTN effect,current fluctuation,random telegraph noise,tunneling gap,Current measurement,Data models,Electron traps,Fluctuations,Integrated circuit modeling,Mathematical model,Switches,Compact model,RRAM,Random Telegraph Noise},
doi={10.1109/IRPS.2016.7574621},
ISSN={},
month={April},}
@INPROCEEDINGS{luo2015iedm,
author={Qing Luo and Xiaoxin Xu and Hongtao Liu and Hangbing Lv and Tiancheng Gong and Shibing Long and Qi Liu and Haitao Sun and Writam Banerjee and Ling Li and Jianfeng Gao and Nianduan Lu and Steve S. Chung and Jing Li and Ming Liu},
booktitle={2015 IEEE International Electron Devices Meeting (**IEDM**)},
title={Demonstration of 3D vertical {RRAM} with ultra low-leakage, high-selectivity and self-compliance memory cells},
year={2015},
date={2015-12},
volume={},
number={},
pages={10.2.1--10.2.4},
keywords={conference, hafnium compounds,ionic conductivity,leakage currents,mixed conductivity,resistive RAM,3D vertical RRAM,HfO2,HfO2/mixed ionic and electronic conductor bilayer,four-layer V-RRAM array,high selectivity,nonlinearity,operation current,self-compliance memory cells,self-selective cell,ultra low-leakage,ultra-low half-select leakage,Hafnium compounds,Leakage currents,Optical switches,Resistance,Three-dimensional displays,Tin},
doi={10.1109/IEDM.2015.7409667},
ISSN={},
month={Dec},
note={(Acceptance Rate*: \underline{33\%})},
}
@INPROCEEDINGS{li2015iscas,
author={Jing Li},
booktitle={2015 IEEE International Symposium on Circuits and Systems (ISCAS)},
title={Enabling phase-change memory for data-centric computing: Technology, circuitand system (INVITED)},
year={2015},
date={2015-05},
volume={},
number={},
pages={21--24},
keywords={conference, Big Data,computer centres,content-addressable storage,memory architecture,phase change memories,Big Data problems,NVM technology,PCM technology,TCAM,computing stack,cost-per-bit factor,data manipulation,data storage,data-centric computing,data-intensive applications,endurance factor,hardware features,nonvolatile memory technology,performance factor,phase-change memory,power factor,retention factor,ternary content addressable memory,Encoding,Hardware,Nonvolatile memory,Phase change materials,Phase change memory,Random access memory,Reliability,Emerging Nonvolatile Memory,PCM,TCAM,Ternary Content Addressable Memory,data-centric system,near-/in-memory computing,phase change memory},
doi={10.1109/ISCAS.2015.7168560},
ISSN={0271-4302},
month={May},}
@INPROCEEDINGS{li2013vlsi,
author={Li, Jing and Montoye, Robert and Ishii, Masatoshi and Stawiasz, Kevin and Nishida, Takeshi and Maloney, Kim and Ditlow, Gary and Lewis, Scott and Maffitt, Tom and Jordan, Richard and others},
booktitle={2013 Symposium on VLSI Circuits},
title={{1Mb} 0.41 um^2 {2T-2R} cell nonvolatile {TCAM} with two-bit encoding and clocked self-referenced sensing (Highlight Paper of the Year)},
year={2013},
date={2013-06-12},
volume={},
number={},
pages={C104--C105},
keywords={conference, CMOS memory circuits,SRAM chips,clocks,content-addressable storage,integrated circuit design,integrated circuit reliability,low-power electronics,phase change memories,search problems,2-transistor-2-resistive-storage cells,2T-2R cells,CSRSS,IBM CMOS technology,PCM process,SRAM-based TCAM,bit rate 1 Mbit/s,cell nonvolatile TCAM,cell size,clocked self-referenced sensing scheme,compact cells,fabricated nonvolatile TCAM,low voltage search operation,match delay,mushroom phase-change memory process,reliable search operation,size 90 nm,technology node,test chip design,two-bit encoding,Arrays,Clocks,Encoding,Microprocessors,Phase change materials,Sensors},
doi={},
ISSN={2158-5601},
month={June},
abstract={This work demonstrates the first fabricated nonvolatile TCAM using 2-transistor/2-resistive-storage (2T-2R) cells to achieve >10× smaller cell size than SRAM-based TCAMs at the same technology node. The test chip was designed and fabricated in IBM 90nm CMOS technology and mushroom phase-change memory (PCM) process. To ensure reliable search operation with such compact cells, two enabling techniques were developed and implemented in hardware: 1) two-bit encoding, and 2) a clocked self-referenced sensing scheme (CSRSS). The 1Mb chip demonstrates reliable low voltage search operation (VDDmin~750mV) and a match delay of 1.9 ns under nominal operating conditions.},
note = {(Acceptance Rate: \underline{27\%}, 109 out of 396)}
}
@INPROCEEDINGS{meza2012iccd,
author={Justin Meza and Jing Li and Onur Mutlu},
booktitle={2012 IEEE 30th International Conference on Computer Design (**ICCD**)},
title={A case for small row buffers in non-volatile main memories},
year={2012},
date={2012-09-30},
volume={},
number={},
pages={484--485},
keywords={conference, DRAM chips,buffer circuits,multiprocessing systems,DRAM baseline,DRAM chips,DRAM-based main memories,NVM technologies,array access,buffered data,chip costs,data mapping schemes,main memory dynamic energy,memory array access,memory parallelism,multicore architectures,nonvolatile main memories,read operations,row buffer size,small row buffers,system-level trends,Arrays,Memory management,Nonvolatile memory,Organizations,Phase change materials,Random access memory},
abstract={DRAM-based main memories have read operations that destroy the read data, and as a result, must buffer large amounts of data on each array access to keep chip costs low. Unfortunately, system-level trends such as increased memory contention in multi-core architectures and data mapping schemes that improve memory parallelism lead to only a small amount of the buffered data to be accessed. This makes buffering large amounts of data on every memory array access energy-inefficient; yet organizing DRAM chips to buffer small amounts of data is costly, as others have shown. Emerging non-volatile memories (NVMs) such as PCM, STT-RAM, and RRAM, however, do not have destructive read operations, opening up opportunities for employing small row buffers without incurring additional area penalty and/or design complexity. In this work, we discuss and evaluate architectural changes to enable small row buffers at a low cost in NVMs. We find that on a multi-core system, reducing the row buffer size can greatly reduce main memory dynamic energy compared to a DRAM baseline with large row sizes, without greatly affecting endurance, and for some NVM technologies, leads to improved performance.},
doi={10.1109/ICCD.2012.6378685},
ISSN={1063-6404},
month={Sept},
note={(Acceptance rate: \underline{25\%}, 61 out of 241)},
}
@INPROCEEDINGS{li2012irps,
author={Jing Li and Binquan Luan and Chung Lam},
booktitle={2012 IEEE International Reliability Physics Symposium (**IRPS**)},
title={Resistance drift in phase change memory (INVITED)},
year={2012},
volume={},
number={},
pages={6C.1.1--6C.1.6},
keywords={conference, circuit reliability,molecular dynamics method,phase change memories,MLC PCM,SR,amorphous chalcogenide material,atomic structure,material engineering,mitigation technique,phase change memory,physics model,quantum molecular dynamic simulation,reliability issue,structural relaxation,time dependent resistance drift,Annealing,Kinetic theory,Phase change materials,Resistance,Strontium,Temperature measurement,drift,multi-level cell,phase change memory,structural relaxation},
doi={10.1109/IRPS.2012.6241871},
ISSN={1541-7026},
month={April},}
@INPROCEEDINGS{du2012irps,
author={Pei-Ying Du and J. Y. Wu and T. H. Hsu and M. H. Lee and T. Y. Wang and H. Y. Cheng and E. K. Lai and S. C. Lai and H. L. Lung and S. Kim and M. J. BrightSky and Y. Zhu and S. Mittal and R. Cheek and S. Raoux and E. A. Joseph and A. Schrott and Jing Li and Chung Lam},
booktitle={2012 IEEE International Reliability Physics Symposium (**IRPS**)},
title={The impact of melting during reset operation on the reliability of phase change memory},
year={2012},
volume={},
number={},
pages={6C.2.1--6C.2.6},
keywords={conference, arrays,circuit reliability,electromigration,melting,phase change memories,segregation,GST-based phase change memory,RESET melting healing effect,SET induced damage,SET operation,control circuits,electromigration,large test chips,operation impact,phase change memory reliability,phase segregation,reset operation,Conductivity,Electromigration,Maintenance engineering,Phase change materials,Phase change memory,Resistance,Tin,Endurance,RESET operation,electromigration,melting,phasechange memory (PCM),reliability,segregation}, doi={10.1109/IRPS.2012.6241872},
ISSN={1541-7026},
month={April},}
@INPROCEEDINGS{raoux2011nvmts,
author={Simone Raoux and Huai-Yu Cheng and Jury Sandrini and Jing Li and Jean Jordan-Sweet},
booktitle={2011 11th Annual Non-Volatile Memory Technology Symposium Proceeding (NVMTS)},
title={Materials engineering for Phase Change Random Access Memory},
year={2011},
volume={},
number={},
pages={1--5},
keywords={conference, X-ray diffraction,antimony alloys,crystallisation,germanium alloys,phase change materials,phase change memories,tellurium alloys,GeSbTe,amorphous phase,crystallization temperature,electrical contrast,materials ewngineering,phase change random access memory,rhombohedral phase,temperature 200 degC,time resolved X-ray diffraction,Phase Change Materials,Phase Change Random Access Memory},
doi={10.1109/NVMTS.2011.6137090},
ISSN={},
month={Nov},}
@INPROCEEDINGS{kim2012vlsitsa,
author={S. Kim and P. Y. Du and Jing Li and M. Breitwisch and Y. Zhu and S. Mittal and R. Cheek and T. H. Hsu and M. H. Lee and A. Schrott and S. Raoux and H. Y. Cheng and S. C. Lai and J. Y. Wu and T. Y. Wang and E. A. Joseph and E. K. Lai and A. Ray and H. L. Lung and C. Lam},
booktitle={Proceedings of Technical Program of 2012 VLSI Technology, System and Application (**VLSI-TSA**)},
title={Optimization of programming current on endurance of phase change memory},
year={2012},
volume={},
number={},
pages={1--2},
keywords={conference, failure analysis,phase change memories,RESET current margin,endurance cycles,endurance failure modes,material segregation effect,open failure,optimization,phase change memory,phase-dependent open-failure mechanisms,programming conditions,programming current,stuck-SET failure characteristic curves,Current density,Optimization,Phase change materials,Phase change memory,Programming,Resistance},
doi={10.1109/VLSI-TSA.2012.6210122},
ISSN={1524-766X},
month={April},}
@INPROCEEDINGS{li2011iedm,
author={Jing Li and Binquan Luan and T. H. Hsu and Y. Zhu and G. Martyna and D. Newns and H. Y. Cheng and S. Raoux and H. L. Lung and C. Lam},
booktitle={2011 International Electron Devices Meeting (**IEDM**)},
title={Explore physical origins of resistance drift in phase change memory and its implication for drift-insensitive materials},
year={2011},
volume={},
number={},
pages={12.5.1--12.5.4},
keywords={conference, amorphous semiconductors,antimony alloys,atomic structure,germanium alloys,phase change materials,phase change memories,tellurium alloys,Ge,Sb,Te,amorphous germanium,atomic structure,drift-insensitive phase change material,electrical characteristics,first principle ab initio method,material-device characterization,phase change memory,resistance drift,tellurium ternary alloys,Conductivity,Phase change materials,Phase change memory,Programming,Resistance,Temperature measurement},
doi={10.1109/IEDM.2011.6131541},
ISSN={0163-1918},
month={Dec},
note={(Acceptance Rate*: \underline{33\%})},
}
@INPROCEEDINGS{wu2011iedm,
author={J. Y. Wu and M. Breitwisch and S. Kim and T. H. Hsu and R. Cheek and P. Y. Du and Jing Li and E. K. Lai and Y. Zhu and T. Y. Wang and H. Y. Cheng and A. Schrott and E. A. Joseph and R. Dasaka and S. Raoux and M. H. Lee and H. L. Lung and C. Lam},
booktitle={2011 International Electron Devices Meeting (**IEDM**)},
title={A low power phase change memory using thermally confined {TaN/TiN} bottom electrode},
year={2011},
volume={},
number={},
pages={3.2.1--3.2.4},
keywords={conference, conductors (electric),electrodes,heat losses,integrated circuit reliability,low-power electronics,phase change memories,tantalum compounds,thermal insulation,titanium compounds,TaN-TiN,current 30 muA,electrical conductivity,electrothermal simulation,low power PCM,low power phase change memory,size 1.5 nm,size 39 nm,storage capacity 256 Mbit,thermal barrier,thermal insulation,thermally confined bottom electrode,Electrodes,Heating,Phase change memory,Solids,Thermal resistance,Tin},
doi={10.1109/IEDM.2011.6131479},
ISSN={0163-1918},
month={Dec},
note={(Acceptance Rate*: \underline{33\%})},
}
@INPROCEEDINGS{wen2011esscirc,
author={ Cheng-Yuan Wen and Jeyanandh Paramesh and Larry Pileggi and Jing Li and SangBum Kim and Jonathan Proesel and Chung Lam},
booktitle={2011 Proceedings of the ESSCIRC (**ESSCIRC**)},
title={Post-silicon calibration of analog {CMOS} using phase-change memory cells},
year={2011},
volume={},
number={},
pages={423--426},
keywords={conference, CMOS analogue integrated circuits,antimony compounds,calibration,chalcogenide glasses,comparators (circuits),elemental semiconductors,germanium compounds,phase change memories,redundancy,silicon,tellurium compounds,Ge2Sb2Te5,IBM CMOS technology,PCRAM mushroom cells,Si,analog CMOS,capacitance 4.41 fF,combinatorial redundancy,digital calibration,embedded GST,nonvolatile phase-change random access memory cells,offset-minimized CMOS comparator,post-manufacturing calibration,post-silicon calibration,power 55.42 muW,size 90 nm,switchable resistances,voltage 1 V,Arrays,CMOS integrated circuits,Calibration,Generators,Phase change random access memory,Redundancy,Resistance},
doi={10.1109/ESSCIRC.2011.6044997},
ISSN={1930-8833},
month={Sept},
note={(Acceptance Rate: \underline{38\%}, 121 out of 314)},
}
@INPROCEEDINGS{wen2011vlsi,
author={C. Y. Wen and Jing Li and S. Kim and M. Breitwisch and C. Lam and J. Paramesh and L. T. Pileggi},
booktitle={2011 Symposium on VLSI Circuits - Digest of Technical Papers},
title={A non-volatile look-up table design using {PCM} (phase-change memory) cells},
year={2011},
date={2011-06-15},
volume={},
number={},
pages={302--303},
keywords={conference, CMOS memory circuits,antimony compounds,chalcogenide glasses,germanium compounds,logic circuits,phase change memories,programmable circuits,random-access storage,tellurium compounds,CMOS technology,Ge2Sb2Te5,PCM mushroom cell,digital look-up table circuit,nonvolatile logic functions,nonvolatile look-up table design,phase-change memory,programmable logic functions,resistance transformation ratio,size 90 nm,voltage 1 V,CMOS integrated circuits,Logic gates,Phase change materials,Phase change random access memory,Resistance,Table lookup},
doi={},
ISSN={2158-5636},
month={June},
note={(Acceptance Rate: \underline{28\%}, 115 out of 409)},
}
@INPROCEEDINGS{li2011imw,
author={Jing Li and C. I. Wu and S. C. Lewis and J. Morrish and T. Y. Wang and R. Jordan and T. Maffitt and M. Breitwisch and A. Schrott and R. Cheek and H. L. Lung and C. Lam},
booktitle={2011 3rd IEEE International Memory Workshop (**IMW**)},
title={A Novel Reconfigurable Sensing Scheme for Variable Level Storage in Phase Change Memory},
year={2011},
volume={},
number={},
pages={1--4},
keywords={conference, CMOS digital integrated circuits,NAND circuits,flash memories,phase change memories,2Mcell PCM chip,CMOS technology,NAND flash,analog resistance levels,frequency 50 MHz,phase change memory,reconfigurable sensing scheme,size 90 nm,time 35 mus to 50 mus,time 5 mus,variable level storage,word length 8 bit,Clocks,Electrical resistance measurement,Flash memory,Phase change materials,Radiation detectors,Resistance},
doi={10.1109/IMW.2011.5873227},
ISSN={2159-483X},
month={May},}
@INPROCEEDINGS{rajendran2011imw,
author={B. Rajendran and R. W. Cheek and L. A. Lastras and M. M. Franceschini and M. J. Breitwisch and A. G. Schrott and Jing Li and R. K. Montoye and L. Chang and C. Lam},
booktitle={2011 3rd IEEE International Memory Workshop (**IMW**)},
title={Demonstration of {CAM} and {TCAM} Using Phase Change Devices},
year={2011},
volume={},
number={},
pages={1--4},
keywords={conference, Monte Carlo methods,content-addressable storage,phase change memories,Monte-Carlo simulation,PCM decives,SRAM,TCAM,content addressable memory,phase change devices,phase change memory technology,ternary CAM,Arrays,Computer aided manufacturing,FETs,Phase change materials,Programming,Resistance,Resistors},
doi={10.1109/IMW.2011.5873229},
ISSN={2159-483X},
month={May},}
@INPROCEEDINGS{zhang2009iedm,
author={Xiao Zhang and Jing Li and M. Grubbs and M. Deal and B. Magyari-Köpe and B. M. Clemens and Y. Nishi},
booktitle={2009 IEEE International Electron Devices Meeting (**IEDM**)},
title={Physical model of the impact of metal grain work function variability on emerging dual metal gate {MOSFETs} and its implication for {SRAM} reliability},
year={2009},
date={2009-12},
volume={},
number={},
pages={1--4},
keywords={conference, MOS integrated circuits,MOSFET,SRAM chips,integrated circuit metallisation,integrated circuit reliability,work function,SRAM reliability,dual metal gate MOSFET,grain orientation difference,metal grain work function variability,polycrystalline metal gate,size 22 nm,Charge carrier density,Circuit analysis,Electrodes,Fluctuations,High K dielectric materials,MOSFETs,Predictive models,Random access memory,Resource description framework,Semiconductor process modeling},
doi={10.1109/IEDM.2009.5424420},
ISSN={0163-1918},
month={Dec},
note={(Acceptance Rate*: \underline{33\%})},
}
@inproceedings{li2009dac,
author = {Li, Jing and Roy, Kaushik},
title = {Robust Heterogeneous System Design in Spintronics: Error Resilient Spin Torque {MRAM} ({STT MRAM}) Design},
booktitle = {the 46th Annual Design Automation Conference PHD Forum},
series = {**DAC** '09},
year = {2009},
keywords = {conference},
note = {(Acceptance Rate: \underline{22\%}, 148 out of 684)},
}
@inproceedings{li2009aspdac,
author = {Li, Jing and Ndai, Patrick and Goel, Ashish and Liu, Haixin and Roy, Kaushik},
title = {An Alternate Design Paradigm for Robust Spin-torque Transfer Magnetic {RAM} ({STT MRAM}) from Circuit/Architecture Perspective},
booktitle = {Proceedings of the 2009 Asia and South Pacific Design Automation Conference},
series = {**ASP-DAC** '09},
year = {2009},
month={Jan},
date={2009-01-19},
isbn = {978-1-4244-2748-2},
location = {Yokohama, Japan},
pages = {841--846},
numpages = {6},
url = {http://dl.acm.org/citation.cfm?id=1509633.1509820},
doi = {10.1109/ASPDAC.2009.4796585},
acmid = {1509820},
publisher = {IEEE Press},
address = {Piscataway, NJ, USA},
keywords = {conference, stt mram},
abstract={Spin-Torque Transfer Magnetic RAM (STT MRAM) is a promising candidate for future embedded applications. It provides desirable memory attributes such as fast access time, low cost, high density and non-volatility. However, variations in process parameters can lead to a large number of cells to fail, severely affecting the yield of the memory array. In this paper, we provide a thorough analysis of the impact of design parameters on parametric failures due to process variations. To achieve high memory yield without incurring expensive technology modification, we developed an alternate design paradigm ---circuit/architecture co-design --- to take advantage of different levels of design hierarchy (circuit and architecture) to improve the yield and memory density. The technique decouples the conflicting design requirements for read stability/writability and density. Consequently, the memory cell failure probability reduces by 48\% and cell area reduces by 21\% with negligible performance degradation (~0.4\%).},
note = {(Acceptance Rate: \underline{33\%}, 116 out of 355)},
}
@inproceedings{li2009gsrc,
author = {Jing Li and Patrick Ndai and Goel Ashish and Kaushik Roy},
title = {Variation Resilient Spin Torque Transfer {MRAM} (poster)},
booktitle = {GSRC Workshop},
year = {2009},
month={Mar},
location = {Dallas, TX, USA},
keywords = {conference},
}
@inproceedings{li2008techcon,
author = {Jing Li and Kaushik Roy},
title = {Modeling of Failure Probability and Statistical Design of Spin-Torque Transfer Magnetic {RAM} ({STT MRAM}) Array for Yield Enhancement},
booktitle = {SRC Technology and Talent for the 21st Century Technology (TECHCON)},
year = {2008},
keywords = {conference},
}
@INPROCEEDINGS{li2008cicc,
author={Jing Li and Haixin Liu and S. Salahuddin and Kaushik Roy},
booktitle={2008 IEEE Custom Integrated Circuits Conference (**CICC**)},
title={Variation-tolerant Spin-Torque Transfer ({STT}) {MRAM} array for yield enhancement},
year={2008},
date={2008-09-21},
volume={},
number={},
pages={193--196},
keywords={conference, Green's function methods,MRAM devices,DRAM,SRAM,flash memories,nonequilibrium Green's function,optimization,variation-tolerant spin-torque transfer MRAM array,yield enhancement,Circuit simulation,Circuit stability,Circuit synthesis,Electrodes,Green's function methods,Magnetic tunneling,Random access memory,Read-write memory,Robust stability,Scalability},
doi={10.1109/CICC.2008.4672056},
ISSN={0886-5930},
month={Sept},}
@INPROCEEDINGS{li2008dac,
author={Jing Li and Charles Augustine and Sayeef Salahuddin and Kaushik Roy},
booktitle={2008 45th ACM/IEEE Design Automation Conference (**DAC**)},
title={Modeling of failure probability and statistical design of Spin-Torque Transfer Magnetic Random Access Memory ({STT MRAM}) array for yield enhancement},
year={2008},
date={2008-06-08},
volume={},
number={},
pages={278--283},
keywords={conference, failure analysis,magnetic storage,magnetoelectronics,optimisation,random-access storage,coupled electromagnetic dynamics,failure probability,on-chip embedded memories,spin-torque transfer magnetic random access memory,spintronic device,statistical optimization methodology,yield enhancement,Couplings,Failure analysis,Flash memory,Magnetic analysis,Magnetic devices,Predictive models,Probability,Random access memory,Read-write memory,Scalability,STT MRAM,Yield},
doi={10.1145/1391469.1391540},
ISSN={0738-100X},
month={June},
note = {(Acceptance Rate: \underline{23\%}, 147 out of 639)},
}