@article{5a7e896dfd2943ac8197f0c21099b6da,
title = "Machine-learning-optimized Cas12a barcoding enables the recovery of single-cell lineages and transcriptional profiles",
abstract = "The development of CRISPR-based barcoding methods creates an exciting opportunity to understand cellular phylogenies. We present a compact, tunable, high-capacity Cas12a barcoding system called dual acting inverted site array (DAISY). We combined high-throughput screening and machine learning to predict and optimize the 60-bp DAISY barcode sequences. After optimization, top-performing barcodes had ∼10-fold increased capacity relative to the best random-screened designs and performed reliably across diverse cell types. DAISY barcode arrays generated ∼12 bits of entropy and ∼66,000 unique barcodes. Thus, DAISY barcodes—at a fraction of the size of Cas9 barcodes—achieved high-capacity barcoding. We coupled DAISY barcoding with single-cell RNA-seq to recover lineages and gene expression profiles from ∼47,000 human melanoma cells. A single DAISY barcode recovered up to ∼700 lineages from one parental cell. This analysis revealed heritable single-cell gene expression and potential epigenetic modulation of memory gene transcription. Overall, Cas12a DAISY barcoding is an efficient tool for investigating cell-state dynamics.",
keywords = "CRISPR barcoding, Cas12a, PRC2, high throughput screening, lineage tracking, machine learning, melanoma, online learning optimization, single cell genomics, transcriptional memory",
author = "Hughes, {Nicholas W.} and Yuanhao Qu and Jiaqi Zhang and Weijing Tang and Justin Pierce and Chengkun Wang and Aditi Agrawal and Maurizio Morri and Norma Neff and Winslow, {Monte M.} and Mengdi Wang and Le Cong",
note = "Funding Information: We are grateful to members of the Cong and Winslow laboratories and to Jess Hebert, Ravi Dinesh, Sarah Pierce, Feng Pan, Li Zhu, and Will Johnson for support with experiments and discussions on the manuscript. We thank the following scientists: Dr. Feng Zhang (Addgene # 84739) and Dr. Keith Joung (Addgene # 107942). This work was supported by the National Institutes of Health (R35-HG011316 to L.C. and R01-CA231253 to M.M.W.), by Donald and Delia Baxter Foundation (to L.C.), and by National Science Foundation (NSF 1953686 to M.W. and 1953415 to L.C.). This material is based upon work supported by the National Science Foundation Graduate Research Fellowship no. 2018261164 (to N.W.H.) and by Simcere Pharmaceutical Group. The computational analysis is supported by NIH 1S10OD023452 to Stanford Genomics Cluster. N.W.H. Y.Q. and L.C. designed and performed experiments, analyzed data, and wrote the manuscript. J.Z. and M.W. performed computational analysis, analyzed data, and wrote the manuscript. W.T. performed computational analysis and analyzed data. J.P. C.W. A.A. and M.M. performed experiments. M.M. and N.N. designed experiments and provided reagents. M.M.W. M.W. and L.C. supervised the research and wrote the manuscript. Stanford University has filed patent applications with L.C. and N.W.H. as inventors on the basis of this work. L.C. is a member of the scientific advisory board of Arbor Biotechnologies. One or more of the authors of this paper self-identifies as an underrepresented ethnic minority in science. Funding Information: We are grateful to members of the Cong and Winslow laboratories and to Jess Hebert, Ravi Dinesh, Sarah Pierce, Feng Pan, Li Zhu, and Will Johnson for support with experiments and discussions on the manuscript. We thank the following scientists: Dr. Feng Zhang (Addgene # 84739) and Dr. Keith Joung (Addgene # 107942). This work was supported by the National Institutes of Health ( R35-HG011316 to L.C. and R01-CA231253 to M.M.W.), by Donald and Delia Baxter Foundation (to L.C.), and by National Science Foundation (NSF 1953686 to M.W. and 1953415 to L.C.). This material is based upon work supported by the National Science Foundation Graduate Research Fellowship no. 2018261164 (to N.W.H.) and by Simcere Pharmaceutical Group . The computational analysis is supported by NIH 1S10OD023452 to Stanford Genomics Cluster. Publisher Copyright: {\textcopyright} 2022 The Author(s)",
year = "2022",
month = aug,
day = "18",
doi = "10.1016/j.molcel.2022.06.001",
language = "English (US)",
volume = "82",
pages = "3103--3118.e8",
journal = "Molecular Cell",
issn = "1097-2765",
publisher = "Cell Press",
number = "16",
}