diff --git a/swh.bib b/swh.bib index c03dc5c..b522fc4 100644 --- a/swh.bib +++ b/swh.bib @@ -1,4804 +1,5084 @@ @STRING{pub-mcgraw-hill="Mc{\-}Graw-Hill" } @STRING{pub-mcgraw-hill:adr="New York, NY, USA" } @STRING{pub-mit = "The MIT Press" } @STRING{pub-mit:adr="Cambridge, MA" } @Article{ 1999-beagle-in-commons, Title = {Conceptualizing an information commons}, Author = {Beagle, Donald}, Journal = {The Journal of Academic Librarianship}, Volume = {25}, Number = {2}, Pages = {82--89}, Year = {1999}, Publisher = {Elsevier} } @Article{ 2002-lerner-foss-economics, Title = {Some simple economics of open source}, Author = {Lerner, Josh and Tirole, Jean}, Journal = {The journal of industrial economics}, Volume = {50}, Number = {2}, Pages = {197--234}, Year = {2002}, Publisher = {Wiley Online Library} } @InProceedings{ 2004-dyba-ese, Title = {Evidence-based software engineering}, Author = {Kitchenham, Barbara A and Dyba, Tore and Jorgensen, Magne}, BookTitle = {Software Engineering, 2004. ICSE 2004. Proceedings. 26th International Conference on}, Pages = {273--281}, Year = {2004}, Organization = {IEEE} } @InProceedings{ 2006-zeller-msr, Title = {Mining metrics to predict component failures}, Author = {Nagappan, Nachiappan and Ball, Thomas and Zeller, Andreas}, BookTitle = {Proceedings of the 28th international conference on Software engineering}, Pages = {452--461}, Year = {2006}, Organization = {ACM} } @InProceedings{ 2007-mockus-reuse, Title = {Large-scale code reuse in open source software}, Author = {Mockus, Audris}, BookTitle = {FLOOS'07: 1st International Workshop on Emerging Trends in {FLOSS} Research and Development}, Year = {2007}, Organization = {IEEE} } @InProceedings{ mockus2009amassing, Author = {Audris Mockus}, Title = {Amassing and indexing a large sample of version control systems: Towards the census of public source code history}, BookTitle = {Proceedings of the 6th International Working Conference on Mining Software Repositories, {MSR} 2009}, Pages = {11--20}, Publisher = {{IEEE} Computer Society}, Year = {2009}, URL = {https://doi.org/10.1109/MSR.2009.5069476}, DOI = {10.1109/MSR.2009.5069476}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/msr/Mockus09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ 2008-paskin-doi, Title = {Digital object identifier ({DOI}) system}, Author = {Paskin, Norman}, Journal = {Encyclopedia of library and information sciences}, Volume = {3}, Pages = {1586--1592}, Year = {2008}, Publisher = {Taylor \& Francis} } @InCollection{ 2008-riehle-foss-growth, Title = {The total growth of open source}, Author = {Deshpande, Amit and Riehle, Dirk}, BookTitle = {Open Source Development, Communities and Quality}, Pages = {197--209}, Year = {2008}, Publisher = {Springer} } @Article{ 2011-cerf-bitrot, Author = {Cerf, Vinton G.}, Journal = {Proceedings of the IEEE}, Title = {Avoiding "Bit Rot": Long-Term Preservation of Digital Information [Point of View]}, Year = {2011}, Month = {June}, Volume = {99}, Number = {6}, Pages = {915-916}, Abstract = {This paper discusses about the long term preservation of digital information. There is something ultimately satisfying about keeping information in digital form. It does not take up much space. It can be replicated for resilient preservation. It can be searched mechanically. It can be used to combine with other material using digital power tools. But this blissful outlook may not comport with the reality of digital information preservation and interpretation.}, Keywords = {information storage;software engineering;bit rot;digital information interpretation;digital power tool;resilient preservation;Content management;Data storage systems;Digital systems;Information management;Information retrieval;Libraries;Software;Standards}, DOI = {10.1109/JPROC.2011.2124190}, ISSN = {0018-9219} } @Article{ 2011-peng-reproducible-computations, Title = {Reproducible research in computational science}, Author = {Peng, Roger D}, Journal = {Science}, Volume = {334}, Number = {6060}, Pages = {1226}, Year = {2011}, Publisher = {NIH Public Access} } @Article{ 2013-gent-recomputation-manifesto, Author = {Ian P. Gent}, Title = {The Recomputation Manifesto}, Journal = {CoRR}, Year = {2013}, Volume = {abs/1304.3674}, Note = {\url{http://www.software.ac.uk/blog/2013-07-09-recomputation-manifesto}} } @InProceedings{ 2013-kirschenbaum-history-exe, Author = {Matthew Kirschenbaum}, Title = {History.exe: How can we preserve the software of today for historians of tomorrow?}, CrossRef = {2013-preserving-exe}, Note = {\url{http://www.slate.com/articles/arts/culturebox/2013/07/how_will_historians_of_the_future_run_ms_word_97_how_can_we_save_it_for.single.html}} } @Article{ 2014-fursin-collectivemind, Author = {Grigori Fursin and Renato Miceli and Anton Lokhmotov and Michael Gerndt and Marc Baboulin and Allen D. Malony and Zbigniew Chamski and Diego Novillo and Davide Del Vento}, Title = {Collective mind: Towards practical and collaborative auto-tuning}, Journal = {Scientific Programming}, Year = {2014}, Volume = {22}, Number = {4}, Pages = {309--329}, URL = {http://dx.doi.org/10.3233/SPR-140396}, DOI = {10.3233/SPR-140396}, timestamp = {Tue, 30 Sep 2014 17:29:46 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/sp/FursinMLGBMCNV14}, bibsource = {dblp computer science bibliography, http://dblp.org} } @InProceedings{ 2014-kirschenbaum-software-thing, Author = {Matthew Kirschenbaum}, Title = {Software, It's a Thing}, CrossRef = {2014-digital-preservation}, Note = {\url{https://medium.com/@mkirschenbaum/software-its-a-thing-a550448d0ed3}} } @TechReport{ 2014-rousseau-tempsdulogiciel, Title = {Le temps du logiciel}, Author = {Magali Fitzgibbon and Luc Grateau and Guillaume Rousseau}, Institution = {Institut National de la Propriété Industrielle}, Year = {2014} } @Article{ 2017whitt, Author = {Whitt, Richard S.}, Title = {"Through A Glass, Darkly" Technical, Policy, and Financial Actions to Avert the Coming Digital Dark Ages}, Journal = {Santa Clara High Tech. L.J.}, Year = 2017, Volume = 33, Number = 2, Pages = 117, Month = jan, Note = {Available at: http://digitalcommons.law.scu.edu/chtlj/vol33/iss2/1} } @Article{ anderson2015digital, Title = {The digital dark age}, Author = {Anderson, David}, Journal = {Communications of the {ACM}}, Volume = {58}, Number = {12}, Pages = {20--23}, Year = {2015}, Publisher = {ACM} } @Misc{ ark, Author = {The California Digital Library}, Title = {Archival Resource Key}, URL = {http://n2t.net/e/ark_ids.html}, Year = {2001} } @Article{ Arms01, Author = {William Y. Arms}, Title = {Uniform resource names: handles, PURLs, and digital object identifiers}, Journal = {Communications of the {ACM}}, Volume = {44}, Number = {5}, Pages = {68}, Year = {2001}, URL = {http://doi.acm.org/10.1145/374308.375358}, DOI = {10.1145/374308.375358}, timestamp = {Thu, 20 Nov 2003 13:05:23 +0100}, biburl = {http://dblp2.uni-trier.de/rec/bib/journals/cacm/Arms01}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ armscacm2001, Author = {Arms, William Y.}, Title = {Uniform Resource Names: Handles, PURLs, and Digital Object Identifiers}, Journal = {Communications of the {ACM}}, issue_date = {May 2001}, Volume = {44}, Number = {5}, Month = may, Year = {2001}, ISSN = {0001-0782}, Pages = {68--}, URL = {http://doi.acm.org/10.1145/374308.375358}, DOI = {10.1145/374308.375358}, acmid = {375358}, Publisher = {ACM}, Address = {New York, NY, USA} } @InProceedings{ arnab2006, Author = {Arnab, Alapan and Hutchison, Andrew}, Title = {Verifiable Digital Object Identity System}, BookTitle = {Proceedings of the ACM Workshop on Digital Rights Management}, Series = {DRM '06}, Year = {2006}, ISBN = {1-59593-555-X}, Location = {Alexandria, Virginia, USA}, Pages = {19--26}, numpages = {8}, URL = {http://doi.acm.org/10.1145/1179509.1179514}, DOI = {10.1145/1179509.1179514}, acmid = {1179514}, Publisher = {ACM}, Address = {New York, NY, USA}, Keywords = {data identity, digital signatures, handle system, identifier, identity verification, vdoi} } @Article{ Baker2016, DOI = {10.1038/533452a}, URL = {https://doi.org/10.1038/533452a}, Year = {2016}, Month = {may}, Publisher = {Springer Nature}, Volume = {533}, Number = {7604}, Pages = {452--454}, Author = {Monya Baker}, Title = {1,500 scientists lift the lid on reproducibility}, Journal = {Nature} } @Article{ Benureau:2018, DOI = {10.3389/fninf.2017.00069}, URL = {https://doi.org/10.3389/fninf.2017.00069}, Year = {2018}, Month = jan, Publisher = {Frontiers Media {SA}}, Volume = {11}, Author = {Fabien C. Y. Benureau and Nicolas P. Rougier}, Title = {Re-run, Repeat, Reproduce, Reuse, Replicate: Transforming Code into Scientific Contributions}, Journal = {Frontiers in Neuroinformatics} } @Article{ Biagioli2016, Author = {Biagioli, Mario}, Year = {2016}, Month = {07}, Pages = {201-201}, Title = {Watch out for cheats in citation game}, Volume = {535}, Journal = {Nature}, DOI = {10.1038/535201a} } @Misc{ bitcoin, Author = {Satoshi Nakamoto}, Title = {Bitcoin: A Peer-to-Peer Electronic Cash System}, URL = {https://bitcoin.org/bitcoin.pdf}, Year = {2010}, Note = {retrieved 09 April 2018} } @Article{ bizer2009linked-data, Title = {Linked data-the story so far}, Author = {Bizer, Christian and Heath, Tom and Berners-Lee, Tim}, Journal = {Semantic services, interoperability and web applications: emerging concepts}, Pages = {205--227}, Year = {2009} } @Article{ Borgman2012, Author = {Christine L. Borgman and Jillian C. Wallis and Matthew S. Mayernik}, Title = {Who's Got the Data? Interdependencies in Science and Technology Collaborations}, Journal = {Computer Supported Cooperative Work}, Volume = {21}, Number = {6}, Pages = {485--523}, Year = {2012}, URL = {https://doi.org/10.1007/s10606-012-9169-z}, DOI = {10.1007/s10606-012-9169-z}, timestamp = {Mon, 05 Jun 2017 20:44:38 +0200}, biburl = {https://dblp.org/rec/bib/journals/cscw/BorgmanWM12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ BugRepair2017, Author = {Matias Martinez and Thomas Durieux and Romain Sommerard and Jifeng Xuan and Martin Monperrus}, Title = {Automatic repair of real bugs in java: a large-scale experiment on the defects4j dataset}, Journal = {Empirical Software Engineering}, Volume = {22}, Number = {4}, Pages = {1936--1964}, Year = {2017}, URL = {https://doi.org/10.1007/s10664-016-9470-4}, DOI = {10.1007/s10664-016-9470-4}, timestamp = {Sat, 16 Sep 2017 12:05:10 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/ese/MartinezDSXM17}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ cacm:wikidata, Title = {Wikidata: a free collaborative knowledgebase}, Author = {Vrande{\v{c}}i{\'c}, Denny and Kr{\"o}tzsch, Markus}, Journal = {Communications of the {ACM}}, Volume = {57}, Number = {10}, Pages = {78--85}, Year = {2014}, Publisher = {ACM} } @InProceedings{ Candoia2016, Author = {Nitin M. Tiwari and Ganesha Upadhyaya and Hridesh Rajan}, Title = {Candoia: a platform and ecosystem for mining software repositories tools}, BookTitle = {Proceedings of the 38th International Conference on Software Engineering, {ICSE} 2016}, Pages = {759--764}, Year = {2016}, CrossRef = {DBLP:conf/icse/2016c}, URL = {http://doi.acm.org/10.1145/2889160.2892662}, DOI = {10.1145/2889160.2892662}, timestamp = {Fri, 24 Mar 2017 09:33:29 +0100}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/TiwariUR16}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ castagne2013consider, Title = {Consider the Source: The Value of Source Code to Digital Preservation Strategies}, Author = {Castagn{\'e}, Michel}, Journal = {SLIS Student Research Journal}, Volume = {2}, Number = {2}, Pages = {5}, Year = {2013} } @Article{ cerf2011avoiding, Title = {Avoiding" Bit Rot": Long-Term Preservation of Digital Information [Point of View]}, Author = {Cerf, Vinton G}, Journal = {Proceedings of the IEEE}, Volume = {99}, Number = {6}, Pages = {915--916}, Year = {2011}, Publisher = {IEEE} } @Book{ CGAL, Title = {{CGAL} User and Reference Manual}, Author = {{The CGAL Project}}, Publisher = {{CGAL Editorial Board}}, Edition = {{4.14}}, Year = 2019, URL = {https://doc.cgal.org/4.14/Manual/packages.html} } @Article{ collberg2014measuring, Title = {Measuring reproducibility in computer systems research}, Author = {Collberg, Christian and Proebsting, Todd and Moraila, Gina and Shankaran, Akash and Shi, Zuoming and Warren, Alex M}, Journal = {Department of Computer Science, University of Arizona, Tech. Rep}, Volume = {37}, Year = {2014}, URL = {http://reproducibility.cs.arizona.edu/tr.pdf} } @Article{ Collberg2016, DOI = {10.1145/2812803}, URL = {https://doi.org/10.1145/2812803}, Year = {2016}, Month = {feb}, Publisher = {ACM}, Volume = {59}, Number = {3}, Pages = {62--69}, Author = {Christian Collberg and Todd A. Proebsting}, Title = {Repeatability in computer systems research}, Journal = {Communications of the {ACM}} } @Article{ CollbergCACM2016, Author = {Christian S. Collberg and Todd A. Proebsting}, Title = {Repeatability in computer systems research}, Journal = {Communications of the {ACM}}, Volume = {59}, Number = {3}, Pages = {62--69}, Year = {2016}, URL = {http://doi.acm.org/10.1145/2812803}, DOI = {10.1145/2812803}, timestamp = {Thu, 25 Feb 2016 18:39:51 +0100}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/cacm/CollbergP16}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Misc{ credit, Author = "CASRAI", Title = "The CRediT Taxonomy", Year = "2015", URL = "https://casrai.org/credit/", Note = "retrieved January 2019" } @Misc{ crossref, Author = "Crossref", Title = "DOI Fees", Year = "2017", URL = "https://web.archive.org/web/20180129114723/https://www.crossref.org/fees/", Note = "Online; retrieved 09 April 2018" } @Article{ Dagstuhl-Artefacts-2016, Author = {Bruce R. Childers and Grigori Fursin and Shriram Krishnamurthi and Andreas Zeller}, Title = {{Artifact Evaluation for Publications (Dagstuhl Perspectives Workshop 15452)}}, Pages = {29--35}, Journal = {Dagstuhl Reports}, ISSN = {2192-5283}, Year = {2016}, Volume = {5}, Number = {11}, Editor = {Bruce R. Childers and Grigori Fursin and Shriram Krishnamurthi and Andreas Zeller}, Publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, Address = {Dagstuhl, Germany}, URL = {http://drops.dagstuhl.de/opus/volltexte/2016/5762}, urn = {urn:nbn:de:0030-drops-57620}, DOI = {10.4230/DagRep.5.11.29}, Annote = {Keywords: Computer systems, artifacts, reproducibility, archive} } @Article{ DagstuhlArtifactEvaluation, Author = {Bruce R. Childers and Grigori Fursin and Shriram Krishnamurthi and Andreas Zeller}, Title = {{Artifact Evaluation for Publications (Dagstuhl Perspectives Workshop 15452)}}, Pages = {29--35}, Journal = {Dagstuhl Reports}, ISSN = {2192-5283}, Year = {2016}, Volume = {5}, Number = {11}, Editor = {Bruce R. Childers and Grigori Fursin and Shriram Krishnamurthi and Andreas Zeller}, Publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, Address = {Dagstuhl, Germany}, URL = {http://drops.dagstuhl.de/opus/volltexte/2016/5762}, urn = {urn:nbn:de:0030-drops-57620}, DOI = {10.4230/DagRep.5.11.29}, Annote = {Keywords: Computer systems, artifacts, reproducibility, archive} } @Article{ DagstuhlEngineeringResearchSoftware, Author = {Alice Allen and Cecilia R. Aragon and Christoph Becker and Jeffrey Carver and Andrei Chis and Beno{\^{\i}}t Combemale and Mike Croucher and Kevin Crowston and Daniel Garijo and Ashish Gehani and Carole A. Goble and Robert Haines and Robert Hirschfeld and James Howison and Kathryn D. Huff and Caroline Jay and Daniel S. Katz and Claude Kirchner and Katie Kuksenok and Ralf L{\"{a}}mmel and Oscar Nierstrasz and Matthew Turk and Rob van Nieuwpoort and Matthew Vaughn and Jurgen J. Vinju}, Title = {Engineering Academic Software (Dagstuhl Perspectives Workshop 16252)}, Journal = {Dagstuhl Manifestos}, Volume = {6}, Number = {1}, Pages = {1--20}, Year = {2017}, URL = {https://doi.org/10.4230/DagMan.6.1.1}, DOI = {10.4230/DagMan.6.1.1}, timestamp = {Wed, 02 Aug 2017 20:15:49 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/dagstuhl-manifestos/AllenABCCCCCGGG17}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ dappert, Author = {Dappert, A. et al.}, Title = {Connecting the Persistent Identifier Ecosystem: Building the Technical and Human Infrastructure for Open Research}, Journal = {Data Science Journal}, Volume = {16}, Pages = {28}, Year = 2017, id = {10.5334/dsj-2017-028}, URL = {http://doi.org/10.5334/dsj-2017-028} } @Article{ davidson2006, Author = {Davidson, J}, Title = {Persistent Identifiers}, Journal = {DCC Briefing Papers: Introduction to Curation. Edinburgh: Digital Curation Centre. Handle: 1842/3368.}, Year = 2006, id = {Handle: 1842/3368}, URL = {http://www.dcc.ac.uk/resources/briefing-papers/introduction-curation} } @Article{ debsources-ese-2016, Author = {Caneill, Matthieu and Daniel M. Germán and Stefano Zacchiroli}, Title = {The Debsources Dataset: Two Decades of Free and Open Source Software}, Publisher = {Springer}, Month = {June}, Year = {2017}, ISSN = {1382-3256}, DOI = {10.1007/s10664-016-9461-5}, Pages = {1405-1437}, Volume = {22}, Journal = {Empirical Software Engineering} } @InProceedings{ debsources-esem-2014, Author = {Caneill, Matthieu and Stefano Zacchiroli}, Title = {Debsources: Live and Historical Views on Macro-Level Software Evolution}, Publisher = {ACM}, Year = {2014}, ISBN = {978-1-4503-2774-9}, DOI = {10.1145/2652524.2652528}, BookTitle = {ESEM 2014: 8th International Symposium on Empirical Software Engineering and Measurement}, Note = {\url{http://sources.debian.net}} } @Misc{ didw3c, Author = "the Contributors to the Decentralized Identifiers (DIDs)", Title = "Decentralized Identifiers (DIDs) v0.9", Year = "2018", URL = "https://w3c-ccg.github.io/did-spec/", Note = "Online; Draft Community Group Report 02 April 2018" } @Article{ dns, Author = {J. Charles}, Journal = {IEEE Software}, Title = {Web interests tangle over DNS proposal}, Year = {1997}, Volume = {14}, Number = {4}, Pages = {100-105}, Keywords = {Contracts;Domain Name System;IP networks;Job shop scheduling;Proposals;Registers;Stability;Trademarks;US Government;Web and internet services}, DOI = {10.1109/MS.1997.595968}, URL = {https://doi.org/10.1109/MS.1997.595968}, ISSN = {0740-7459}, Month = {July} } @Article{ doi, Title = {Digital object identifier ({DOI}) system}, Author = {Paskin, Norman}, Journal = {Encyclopedia of library and information sciences}, Volume = {3}, Pages = {1586--1592}, Year = {2008}, Publisher = {Taylor \& Francis} } @Misc{ earkproject, key = {E-ARK}, Title = {E-ARK (European Archival Records and Knowledge Preservation) Project}, Year = {2014}, HowPublished = {\url{http://www.eark-project.com/}} } @InProceedings{ edos2006wsl, Address = {Porto Alegre, Brazil}, Author = {Boender, Jaap and Di Cosmo, Roberto and Durak, Berke and Leroy, Xavier and Mancinelli, Fabio and Morgado, Mario and Pinheiro, David and Treinen, Ralf and Trezentos, Paulo and Vouillon, J{\'e}r{\^o}me}, BookTitle = {{P}roceedings of the 7th {I}nternational {W}orkshop on {F}ree {S}oftware ({IWFS}'06)}, Editor = {Berger, Olivier}, Month = apr, x-topic = "softeng", Title = {News from the {EDOS} project: improving the maintenance of free software distributions}, urllocal = {http://www.dicosmo.org/Articles/wsl06.pdf}, URL = {http://www.dicosmo.org/Articles/wsl06.pdf}, Year = {2006}, dmi-category = {autc}, Pages = {199 -- 207}, ISBN = {857669066-7} } @Misc{ eigenweb, Author = {Ga\"{e}l Guennebaud and Beno\^{i}t Jacob and others}, Title = {Eigen v3}, HowPublished = {\url{http://eigen.tuxfamily.org}}, Year = {2010} } @Article{ FLOSSmole, Author = {James Howison and Megan Conklin and Kevin Crowston}, Title = {FLOSSmole: {A} Collaborative Repository for {FLOSS} Research Data and Analyses}, Journal = {{IJITWE}}, Volume = {1}, Number = {3}, Pages = {17--26}, Year = {2006}, URL = {http://dx.doi.org/10.4018/jitwe.2006070102}, DOI = {10.4018/jitwe.2006070102}, timestamp = {Fri, 09 Nov 2012 14:42:08 +0100}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/ijitwe/HowisonCC06}, bibsource = {dblp computer science bibliography, http://dblp.org} } @InProceedings{ gao2007archive, Title = {A research collaboratory for open source software research.}, Author = {Gao, Yongqin and VanAntwerp, Matthew and Christley, Scott and Madey, Greg}, BookTitle = {Proceedings of the First International Workshop on Emerging Trends in {FLOSS} Research and Development, {FLOSS}'07}, Year = {2007}, Publisher = {IEEE} } @Article{ force11citationprinciples, Author = {Arfon M. Smith and Daniel S. Katz and Kyle E. Niemeyer}, Institution = {FORCE11 Software Citation Working Group}, Year = 2016, Title = "Software citation principles", Journal = {PeerJ Computer Science}, Volume = {2:e86}, DOI = "10.7717/peerj-cs.86", URL = {https://doi.org/10.7717/peerj-cs.86} } @InProceedings{ GHTorrent, Author = {Georgios Gousios and Diomidis Spinellis}, Title = {GHTorrent: Github's data from a firehose}, BookTitle = {9th {IEEE} Working Conference of Mining Software Repositories, {MSR}}, Pages = {12--21}, Year = {2012}, CrossRef = {DBLP:conf/msr/2012}, URL = {http://dx.doi.org/10.1109/MSR.2012.6224294}, DOI = {10.1109/MSR.2012.6224294}, timestamp = {Wed, 13 May 2015 17:45:20 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/msr/GousiosS12}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ gil2016, Author = {Gil, Yolanda and H. David, Cédric and Demir, Ibrahim and Essawy, Bakinam and Fulweiler, Wally and Goodall, Jonathan and Karlstrom, Leif and Lee, Huikyo and Mills, Heath and Oh, Ji-Hyun and Pierce, Suzanne and Pope, Allen and Tzeng, Mimi and Villamizar, Sandra and Yu, Xuan}, Year = {2016}, Month = {07}, Title = {Towards the Geoscience Paper of the Future: Best Practices for Documenting and Sharing Research from Data to Software to Provenance: Geoscience Paper of the Future}, Volume = {3}, Journal = {Earth and Space Science}, DOI = {10.1002/2015EA000136} } @Misc{ git, Author = {Git community}, Title = "Git version control system", Year = "2005", URL = "https://git-scm.com/", Note = "retrieved 09 April 2018" } @Misc{ gitorious-shutdown, Author = {GitLab}, Title = {GitLab acquires Gitorious to bolster its on premises code collaboration platform}, Year = {2015}, HowPublished = {\url{https://about.gitlab.com/2015/03/03/gitlab-acquires-gitorious/}} } @Misc{ gnu:gpl2, Author = {GNU}, Title = "GNU General Public License, version 2", Year = "1991", URL = "https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html", Note = "retrieved September 2015" } @Misc{ google-code-shutdown, Author = {Google Project Hosting}, Title = {Bidding farewell to Google Code}, Year = {2015}, HowPublished = {\url{https://opensource.googleblog.com/2015/03/farewell-to-google-code.html}} } @Misc{ GPLCompliance, Title = "A Practical Guide to {GPL} Compliance", Author = "Bradley M. Kuhn and Aaron Williamson and Karen M. Sandler", Month = aug, Year = 2008, HowPublished = "\url{https://www.softwarefreedom.org/resources/2008/compliance-guide.html}" } @TechReport{ halswdepositguidelines, Title = {{Create software deposit}}, Author = {Gruenpeter, Morane and Sadowska, Jozefina}, URL = {https://hal.inria.fr/hal-01872189}, Type = {Technical Report}, Institution = {{Inria ; CCSD ; Software Heritage}}, Year = {2018}, Keywords = {Reproductibility ; Source code ; Open / free software ; pr{\'e}servation du patrimoine logiciel ; D{\'e}p{\^o}t ; Guide ; Logiciel open source ; code source ; Archivage ; reproductibilit{\'e} num{\'e}rique}, hal_id = {hal-01872189}, hal_version = {v1} } @TechReport{ halswmoderationguidelines, Title = {{La mod{\'e}ration d'un d{\'e}p{\^o}t logiciel}}, Author = {Gruenpeter, Morane and Sadowska, Jozefina}, URL = {https://hal.inria.fr/hal-01876705}, Type = {Technical Report}, Institution = {{Inria ; CCSD ; Software Heritage}}, Year = {2018}, Keywords = {code source ; pr{\'e}servation du patrimoine logiciel ; reproductibilit{\'e} num{\'e}rique ; Archivage ; Guide ; D{\'e}p{\^o}t ; Mod{\'e}ration ; Logiciel ; Logiciel open source}, hal_id = {hal-01876705}, hal_version = {v1} } @InProceedings{ hassan2008road, Title = {The road ahead for mining software repositories}, Author = {Hassan, Ahmed E}, BookTitle = {Frontiers of Software Maintenance, 2008. FoSM 2008.}, Pages = {48--57}, Year = {2008}, Organization = {IEEE} } @Article{ Hinsen2013, Author = {Konrad Hinsen}, Title = {Software Development for Reproducible Research}, Journal = {Computing in Science and Engineering}, Volume = {15}, Number = {4}, Pages = {60--63}, Year = {2013}, URL = {https://doi.org/10.1109/MCSE.2013.91}, DOI = {10.1109/MCSE.2013.91}, timestamp = {Wed, 14 Nov 2018 10:48:30 +0100}, biburl = {https://dblp.org/rec/bib/journals/cse/Hinsen13a}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ HowisonBullard2016, Author = {Howison, James and Bullard, Julia}, Title = {Software in the scientific literature: Problems with seeing, finding, and using software mentioned in the biology literature}, Journal = {Journal of the Association for Information Science and Technology}, Volume = {67}, Number = {9}, Pages = {2137-2155}, Keywords = {biology, journals, bibliographic citations}, DOI = {10.1002/asi.23538}, URL = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.23538}, EPrint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.23538}, Abstract = {Software is increasingly crucial to scholarship, yet the visibility and usefulness of software in the scientific record are in question. Just as with data, the visibility of software in publications is related to incentives to share software in reusable ways, and so promote efficient science. In this article, we examine software in publications through content analysis of a random sample of 90 biology articles. We develop a coding scheme to identify software “mentions” and classify them according to their characteristics and ability to realize the functions of citations. Overall, we find diverse and problematic practices: Only between 31\% and 43\% of mentions involve formal citations; informal mentions are very common, even in high impact factor journals and across different kinds of software. Software is frequently inaccessible (15\%–29\% of packages in any form; between 90\% and 98\% of specific versions; only between 24\%–40\% provide source code). Cites to publications are particularly poor at providing version information, whereas informal mentions are particularly poor at providing crediting information. We provide recommendations to improve the practice of software citation, highlighting recent nascent efforts. Software plays an increasingly great role in scientific practice; it deserves a clear and useful place in scholarly communication.}, Year = {2016} } @Misc{ hugo, Title = {The HUGO Gene Nomenclature Committee}, URL = {https://www.genenames.org/}, Annote = "[Online; accessed March 1st 2018]" } @Article{ Hwang2017, Author = {Hwang, Lorraine and Fish, Allison and Soito, Laura and Smith, MacKenzie and Kellogg, Louise H.}, Title = {Software and the Scientist: Coding and Citation Practices in Geodynamics}, Journal = {Earth and Space Science}, Volume = {4}, Number = {11}, Pages = {670-680}, Keywords = {attribution, software citation, software best practices, Computational Infrastructure for Geoduynamics, software credit, geodynamics}, DOI = {10.1002/2016EA000225}, URL = {https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1002/2016EA000225}, EPrint = {https://agupubs.onlinelibrary.wiley.com/doi/pdf/10.1002/2016EA000225}, Abstract = {Abstract In geodynamics as in other scientific areas, computation has become a core component of research, complementing field observation, laboratory analysis, experiment, and theory. Computational tools for data analysis, mapping, visualization, modeling, and simulation are essential for all aspects of the scientific workflow. Specialized scientific software is often developed by geodynamicists for their own use, and this effort represents a distinctive intellectual contribution. Drawing on a geodynamics community that focuses on developing and disseminating scientific software, we assess the current practices of software development and attribution, as well as attitudes about the need and best practices for software citation. We analyzed publications by participants in the Computational Infrastructure for Geodynamics and conducted mixed method surveys of the solid earth geophysics community. From this we learned that coding skills are typically learned informally. Participants considered good code as trusted, reusable, readable, and not overly complex and considered a good coder as one that participates in the community in an open and reasonable manor contributing to both long- and short-term community projects. Participants strongly supported citing software reflected by the high rate a software package was named in the literature and the high rate of citations in the references. However, lacking are clear instructions from developers on how to cite and education of users on what to cite. In addition, citations did not always lead to discoverability of the resource. A unique identifier to the software package itself, community education, and citation tools would contribute to better attribution practices.}, Year = {2017} } @Article{ idplos2017, Author = {McMurry, Julie A. AND Juty, Nick AND Blomberg, Niklas AND Burdett, Tony AND Conlin, Tom AND Conte, Nathalie AND Courtot, Mélanie AND Deck, John AND Dumontier, Michel AND Fellows, Donal K. AND Gonzalez-Beltran, Alejandra AND Gormanns, Philipp AND Grethe, Jeffrey AND Hastings, Janna AND Hériché, Jean-Karim AND Hermjakob, Henning AND Ison, Jon C. AND Jimenez, Rafael C. AND Jupp, Simon AND Kunze, John AND Laibe, Camille AND Le Novère, Nicolas AND Malone, James AND Martin, Maria Jesus AND McEntyre, Johanna R. AND Morris, Chris AND Muilu, Juha AND Müller, Wolfgang AND Rocca-Serra, Philippe AND Sansone, Susanna-Assunta AND Sariyar, Murat AND Snoep, Jacky L. AND Soiland-Reyes, Stian AND Stanford, Natalie J. AND Swainston, Neil AND Washington, Nicole AND Williams, Alan R. AND Wimalaratne, Sarala M. AND Winfree, Lilly M. AND Wolstencroft, Katherine AND Goble, Carole AND Mungall, Christopher J. AND Haendel, Melissa A. AND Parkinson, Helen}, Journal = {PLOS Biology}, Publisher = {Public Library of Science}, Title = {Identifiers for the 21st century: How to design, provision, and reuse persistent identifiers to maximize utility and impact of life science data}, Year = {2017}, Month = {06}, Volume = {15}, URL = {https://doi.org/10.1371/journal.pbio.2001414}, Pages = {1-18}, Abstract = {In many disciplines, data are highly decentralized across thousands of online databases (repositories, registries, and knowledgebases). Wringing value from such databases depends on the discipline of data science and on the humble bricks and mortar that make integration possible; identifiers are a core component of this integration infrastructure. Drawing on our experience and on work by other groups, we outline 10 lessons we have learned about the identifier qualities and best practices that facilitate large-scale data integration. Specifically, we propose actions that identifier practitioners (database providers) should take in the design, provision and reuse of identifiers. We also outline the important considerations for those referencing identifiers in various circumstances, including by authors and data generators. While the importance and relevance of each lesson will vary by context, there is a need for increased awareness about how to avoid and manage common identifier problems, especially those related to persistence and web-accessibility/resolvability. We focus strongly on web-based identifiers in the life sciences; however, the principles are broadly relevant to other disciplines.}, Number = {6}, DOI = {10.1371/journal.pbio.2001414} } @Misc{ ImageJ, Title = {ImageJ}, Author = {Rasband, Wayne}, Address = "US and National Institutes of Health. Bethesda, Maryland, USA", Year = {1997--2011}, URL = {http://imagej.nih.gov/ij/}, Note = {Retrieved jan 7, 2018} } @InProceedings{ Inoue2017CodeClones, Author = {Takashi Ishio and Yusuke Sakaguchi and Kaoru Ito and Katsuro Inoue}, Title = {Source file set search for clone-and-own reuse analysis}, BookTitle = {Proceedings of the 14th International Conference on Mining Software Repositories, {MSR} 2017, Buenos Aires, Argentina, May 20-28, 2017}, Pages = {257--268}, Year = {2017}, CrossRef = {DBLP:conf/msr/2017}, URL = {https://doi.org/10.1109/MSR.2017.19}, DOI = {10.1109/MSR.2017.19}, timestamp = {Fri, 07 Jul 2017 14:06:35 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/msr/IshioSII17}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Misc{ internet-id, Author = "International DOI Foundation", Title = "Factsheet: DOI System and Internet Identifier Specifications", Year = "2015", URL = {https://www.doi.org/factsheets/DOIIdentifierSpecs.html}, Note = "Online; retrieved 09 April 2018" } @Misc{ isbn, Title = "What is an ISBN?", URL = "https://www.isbn-international.org/content/what-isbn", Note = "[Online; accessed February 28th 2018]" } @TechReport{ ISO690, Type = {Standard}, key = {ISO 690:2010}, Month = jun, Year = {2010}, Title = {Information and documentation -- Guidelines for bibliographic references and citations to information resources}, Volume = {2010}, Address = {Geneva, CH}, Institution = {International Organization for Standardization} } @InProceedings{ KatzHong2018, Author = {Daniel S. Katz and Neil P. Chue Hong}, Title = {Software Citation in Theory and Practice}, BookTitle = {Mathematical Software - {ICMS} 2018 - 6th International Conference, South Bend, IN, USA, July 24-27, 2018, Proceedings}, Pages = {289--296}, Year = {2018}, CrossRef = {DBLP:conf/icms/2018}, URL = {https://doi.org/10.1007/978-3-319-96418-8\_34}, DOI = {10.1007/978-3-319-96418-8\_34}, timestamp = {Tue, 17 Jul 2018 12:54:15 +0200}, biburl = {https://dblp.org/rec/bib/conf/icms/KatzH18}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ keepproject, key = {European Union}, Title = {KEEP: EU cooperating}, Year = {2000}, HowPublished = {\url{https://www.keep.eu/}} } @Book{ knuth1992literate, Title = {Literate Programming}, Author = {Knuth, D.E.}, ISBN = {9780937073810}, lccn = {lc91039510}, Series = {Center for the Study of Language and Information Publication Lecture Notes}, URL = {https://books.google.fr/books?id=vovpQgAACAAJ}, Year = {1992}, Publisher = {Cambridge University Press} } @Article{ KnuthLiterate84, Author = {Donald E. Knuth}, Title = {Literate Programming}, Journal = {Comput. J.}, Volume = {27}, Number = {2}, Pages = {97--111}, Year = {1984}, URL = {http://dx.doi.org/10.1093/comjnl/27.2.97}, DOI = {10.1093/comjnl/27.2.97}, timestamp = {Tue, 28 Jun 2011 15:15:31 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/cj/Knuth84}, bibsource = {dblp computer science bibliography, http://dblp.org}, citation = "Let us change our traditional attitude to the construction of programs: Instead of imagining that our main task is to instruct a computer what to do, let us concentrate rather on explaining to human beings what we want a computer to do. (page 99)" } @Article{ kranich2008information, Title = {Information Commons}, Author = {Kranich, Nancy and Schement, Jorge Reina}, Journal = {Annual Review of Information Science and Technology}, Volume = {42}, Number = {1}, Pages = {546--591}, Year = {2008}, Publisher = {Wiley} } @Book{ lessig1999code, Title = {Code and other laws of cyberspace}, Author = {Lessig, Lawrence}, Year = {1999}, Publisher = {Basic books} } @InProceedings{ lima-assess-2015, Author = {J. {Lima} and C. {Treude} and F. F. {Filho} and U. {Kulesza}}, BookTitle = {2015 IEEE International Conference on Software Maintenance and Evolution (ICSME)}, Title = {Assessing developer contribution with repository mining-based metrics}, Year = {2015}, Volume = {}, Number = {}, Pages = {536-540}, Keywords = {data mining;program debugging;software metrics;repository mining-based metrics;software companies;software development;code complexity metrics;code contribution metrics;mining software repository;Measurement;Complexity theory;Computer bugs;Software;Interviews;Productivity;Encoding;Project management;software contribution metrics;mining software repositories}, DOI = {10.1109/ICSM.2015.7332509}, ISSN = {}, Month = {Sep.} } @TechReport{ mahadev2015olive, Title = {One-Click Time Travel}, Author = {Mahadev Satyanarayanan, Gloriana St Clair and Gilbert, Benjamin and Abe, Yoshihisa and Harkes, Jan and Ryan, Dan and Linke, Erika and Webster, Keith}, Year = {2015}, Institution = {Technical report, Computer Science, Carnegie Mellon University} } @Article{ matthews2010framework, Title = {A framework for software preservation}, Author = {Matthews, Brian and Shaon, Arif and Bicarregui, Juan and Jones, Catherine}, Journal = {International Journal of Digital Curation}, Volume = {5}, Number = {1}, Pages = {91--105}, Year = {2010} } @InProceedings{ Merkle, Author = {Ralph C. Merkle}, Title = {A Digital Signature Based on a Conventional Encryption Function}, BookTitle = {Advances in Cryptology - {CRYPTO} '87, {A} Conference on the Theory and Applications of Cryptographic Techniques, Santa Barbara, California, USA, August 16-20, 1987, Proceedings}, Pages = {369--378}, Year = {1987}, CrossRef = {DBLP:conf/crypto/1987}, URL = {https://doi.org/10.1007/3-540-48184-2_32}, DOI = {10.1007/3-540-48184-2_32}, timestamp = {Fri, 19 May 2017 13:10:47 +0200}, biburl = {https://dblp.org/rec/bib/conf/crypto/Merkle87}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/crypto/1987, Editor = {Carl Pomerance}, Title = {Advances in Cryptology - {CRYPTO} '87, {A} Conference on the Theory and Applications of Cryptographic Techniques, Santa Barbara, California, USA, August 16-20, 1987, Proceedings}, Series = {Lecture Notes in Computer Science}, Volume = {293}, Publisher = {Springer}, Year = {1988}, URL = {https://doi.org/10.1007/3-540-48184-2}, DOI = {10.1007/3-540-48184-2}, ISBN = {3-540-18796-0}, timestamp = {Tue, 14 May 2019 10:00:48 +0200}, biburl = {https://dblp.org/rec/bib/conf/crypto/1987}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Book{ Mysql5, Author = {Michael Kofler}, Title = {The Definitive Guide to MySQL5}, Publisher = {Apress}, Year = {2005} } @Article{ NatureTop, Author = {Van Noorden, Richard and Brendan Maher and Regina Nuzzo}, Title = {The top 100 papers}, Journal = {Nature}, Year = 2014, Pages = {550-553}, Month = oct # 4, DOI = "10.1038/514550a", URL = {http://doi.org/10.1038/514550a} } @Unpublished{ nsrl2006, Author = {Steve Mead}, Title = {Unique File Identification in the National Software Reference Library}, Note = {National Institute of Standards and Technology}, Year = {2006}, URL = {https://www.nist.gov/sites/default/files/draft-060530.pdf} } @Article{ NumbersGame2007, Author = {Parnas, David Lorge}, Title = {Stop the Numbers Game}, Journal = {Communications of the {ACM}}, issue_date = {November 2007}, Volume = {50}, Number = {11}, Month = nov, Year = {2007}, ISSN = {0001-0782}, Pages = {19--21}, numpages = {3}, URL = {http://doi.acm.org/10.1145/1297797.1297815}, DOI = {10.1145/1297797.1297815}, acmid = {1297815}, Publisher = {ACM}, Address = {New York, NY, USA} } @Manual{ OAIS2002, added-at = {2008-09-12T12:15:29.000+0200}, Author = {}, biburl = {https://www.bibsonomy.org/bibtex/29a3071d3a754f11b547d8daa8d439fa1/hansgeorgbecker}, interhash = {d81d1990c6a25e93776de70fd39e8237}, intrahash = {9a3071d3a754f11b547d8daa8d439fa1}, Keywords = {OAIS Referenzmodell reference.model}, Month = {January}, Organization = {CCSDS - Consultative Committee for Space Data Systems}, timestamp = {2008-09-12T12:15:29.000+0200}, Title = {Reference Model for an Open Archival Information System (OAIS), Blue Book, Issue 1}, URL = {http://public.ccsds.org/publications/archive/650x0b1.pdf}, Year = 2002 } @Article{ open-science-2018-review, Title = {{O}pen {S}cience now: A systematic literature review for an integrated definition}, Author = {Vicente-S{\'a}ez, Rub{\'e}n and Mart{\'\i}nez-Fuentes, Clara}, Journal = {Journal of business research}, Volume = {88}, Pages = {428--436}, Year = {2018}, Publisher = {Elsevier}, URL = {https://doi.org/10.1016/j.jbusres.2017.12.043} } @Article{ osullivan2009making, Title = {Making sense of revision-control systems}, Author = {O'Sullivan, Bryan}, Journal = {Communications of the {ACM}}, Volume = {52}, Number = {9}, Pages = {56--62}, Year = {2009}, Publisher = {ACM} } @Article{ Parmap2012, Author = {Marco Danelutto and Di Cosmo, Roberto}, Title = {A ``{M}inimal {D}isruption'' Skeleton Experiment: Seamless Map {\&} Reduce Embedding in {OC}aml}, Journal = {Procedia CS}, Volume = {9}, Year = {2012}, Pages = {1837-1846}, ee = {http://dx.doi.org/10.1016/j.procs.2012.04.202}, DOI = {http://dx.doi.org/10.1016/j.procs.2012.04.202}, URL = {http://dx.doi.org/10.1016/j.procs.2012.04.202}, urllocal = {http://www.dicosmo.org/Articles/2012-DaneluttoDiCosmo-Pcs.pdf}, dmi-category = {intc}, x-topic = "parallel", bibsource = {DBLP, http://dblp.uni-trier.de} } @Article{ paskin2005digital, Title = {Digital object identifiers for scientific data}, Author = {Paskin, Norman}, Journal = {Data Science Journal}, Volume = {4}, Pages = {12--20}, Year = {2005}, Publisher = {CODATA} } @Article{ paskin2010digital, Title = {Digital object identifier (DOI) system}, Author = {Paskin, Norman}, Journal = {Encyclopedia of library and information sciences}, Volume = {3}, Pages = {1586--1592}, Year = {2010}, Publisher = {Taylor \& Francis England} } @Article{ Peng2015, Author = {Peng, Roger}, Title = {The reproducibility crisis in science: A statistical counterattack}, Journal = {Significance}, Volume = {12}, Number = {3}, Pages = {30-32}, DOI = {10.1111/j.1740-9713.2015.00827.x}, URL = {https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.1740-9713.2015.00827.x}, EPrint = {https://rss.onlinelibrary.wiley.com/doi/pdf/10.1111/j.1740-9713.2015.00827.x}, Abstract = {More people have more access to data than ever before. But a comparative lack of analytical skills has resulted in scientific findings that are neither replicable nor reproducible. It is time to invest in statistics education, says Roger Peng}, Year = {2015} } @Misc{ plan9, Author = "Wikipedia contributors", Title = "Plan 9 from Bell Labs --- Wikipedia{,} The Free Encyclopedia", Year = "2018", URL = "https://en.wikipedia.org/w/index.php?title=Plan_9_from_Bell_Labs&oldid=832417303", Note = "retrieved 09 April 2018" } @Book{ Popper34, Author = {Karl R. Popper}, Title = {Logik der Forschung}, Publisher = {J.C.B. Mohr}, Year = {1971}, ISBN = {316148410X}, Note = {Reprint of the original 1934 edition} } @Article{ PourLaScience2013, Author = {Di Cosmo, Roberto}, Title = {Organiser le partage pour pr\'eserver les donn\'ees}, Journal = {Pour la {S}cience}, Pages = {86--90}, Year = {2013}, Volume = 433, dmi-category = {vulg}, Month = nov, urllocal = {http://www.dicosmo.org/Articles/2013-11-PourLaScience-partage_donnees.pdf}, URL = {http://www.pourlascience.fr/ewb_pages/a/article-32240-organiser-le-partage-pour-preserver-les-donnees.php} } @Book{ ProGit2014, Author = {Chacon, Scott and Straub, Ben}, Title = {Pro Git}, Year = {2014}, ISBN = {1484200772, 9781484200773}, Edition = {2nd}, Publisher = {Apress}, Address = {Berkely, CA, USA} } @Book{ raymond2003art, Title = {The art of Unix programming}, Author = {Raymond, Eric S}, Year = {2003}, Publisher = {Addison-Wesley Professional} } @TechReport{ RFC3650, Author = {S. Sun and L. Lannom and B. Boesch}, Title = {Handle System Overview}, HowPublished = {Internet Requests for Comments}, Type = {RFC}, Number = {3650}, Year = {2003}, Month = {November}, ISSN = {2070-1721}, Publisher = {RFC Editor}, Institution = {RFC Editor} } @TechReport{ rfc3986, Author = {T. Berners-Lee, R. Fielding, L. Masinter}, Title = {Uniform Resource Identifier (URI): Generic Syntax}, HowPublished = {The Internet Society}, Type = {RFC}, Number = {3986}, Year = {2005}, Month = {January}, Publisher = {RFC Editor}, Institution = {RFC Editor}, URL = {https://tools.ietf.org/html/rfc3986} } @Article{ RiehleFLOSSFoundations2010, Author = {Dirk Riehle}, Title = {The Economic Case for Open Source Foundations}, Journal = {{IEEE} Computer}, Volume = {43}, Number = {1}, Pages = {86--90}, Year = {2010}, URL = {https://doi.org/10.1109/MC.2010.24}, DOI = {10.1109/MC.2010.24}, timestamp = {Mon, 05 Jun 2017 20:51:35 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/computer/Riehle10}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Misc{ roadsandbridges, Author = "Nadia Eghbal", Year = 2016, Title = "Roads and Bridges: The Unseen Labor Behind Our Digital Infrastructure.", HowPublished = "Report from the Ford Foundation" } @Article{ rosenthal17, Author = {David S. H. Rosenthal}, Title = {The medium-term prospects for long-term storage systems}, Journal = {Library Hi Tech}, Volume = {35}, Number = {1}, Pages = {11--31}, Year = {2017}, URL = {https://doi.org/10.1108/LHT-11-2016-0128}, DOI = {10.1108/LHT-11-2016-0128}, timestamp = {Sun, 28 May 2017 13:20:10 +0200}, biburl = {https://dblp.org/rec/bib/journals/lht/Rosenthal17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ RosenthalEUDAT, Author = {David Rosenthal and Rob Baxter and Laurence Field}, Title = {Towards a shared vision of sustainability for research and e-infrastructures}, HowPublished = {https://www.eudat.eu/news/towards-shared-vision-sustainability-research-and-e-infrastructures}, Month = "24-25 " # sep, Year = 2014, Note = {EUDAT conference} } @Book{ schweik2012internet, Title = {Internet success: a study of open-source software commons}, Author = {Schweik, Charles M and English, Robert C}, Year = {2012}, Publisher = {MIT Press} } @Article{ seminalhgnc78, Author = {Shows, T.B. and McAlpine, P.J.}, Title = {The catalog of human genes and chromosome assignments}, Journal = {Cytogenetic and Genome Research}, Year = 1978, Volume = 22, Number = {1-6}, Pages = {132--145}, DOI = {10.1159/000130925}, URL = {https://www.karger.com/DOI/10.1159/000130925} } @Book{ SemiPar2003, Author = {David Ruppert}, Title = {Semiparametric Regression (Cambridge Series in Statistical and Probabilistic Mathematics)}, Publisher = {Cambridge University Press}, Year = {2003}, ISBN = {9780521785167} } @InCollection{ sep-names, Author = {Cumming, Sam}, Title = {Names}, BookTitle = {The Stanford Encyclopedia of Philosophy}, Editor = {Edward N. Zalta}, HowPublished = {\url{https://plato.stanford.edu/archives/fall2016/entries/names/}}, Year = {2016}, Edition = {Fall 2016}, Publisher = {Metaphysics Research Lab, Stanford University} } @InCollection{ sep-peirce-semiotics, Author = {Atkin, Albert}, Title = {Peirce's Theory of Signs}, BookTitle = {The Stanford Encyclopedia of Philosophy}, Editor = {Edward N. Zalta}, HowPublished = {\url{https://plato.stanford.edu/archives/sum2013/entries/peirce-semiotics/}}, Year = {2013}, Edition = {Summer 2013}, Publisher = {Metaphysics Research Lab, Stanford University} } @Article{ SHA1, added-at = {2013-01-27T00:00:00.000+0100}, Author = {Dang, Quynh}, biburl = {http://www.bibsonomy.org/bibtex/2639634f48d1781b5312fdc8f19cf5c43/dblp}, ee = {http://dx.doi.org/10.1080/01611194.2012.687431}, interhash = {aea496907c0a161be8390d0e1b69474f}, intrahash = {639634f48d1781b5312fdc8f19cf5c43}, Journal = {Cryptologia}, Keywords = {dblp}, Number = 1, Pages = {69-73}, timestamp = {2013-01-29T11:36:44.000+0100}, Title = {Changes in Federal Information Processing Standard (FIPS) 180-4, Secure Hash Standard.}, Volume = 37, Year = 2013 } @Article{ Shustek06, Author = {Leonard J. Shustek}, Title = {What Should We Collect to Preserve the History of Software?}, Journal = {{IEEE} Annals of the History of Computing}, Volume = {28}, Number = {4}, Pages = {110--112}, Year = {2006}, URL = {http://dx.doi.org/10.1109/MAHC.2006.78}, DOI = {10.1109/MAHC.2006.78}, timestamp = {Tue, 05 Jan 2016 15:36:44 +0100}, biburl = {http://dblp2.uni-trier.de/rec/bib/journals/annals/Shustek06}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ SoftwareAnalytics2013, Author = {Menzies, Tim and Zimmermann, Thomas}, Journal = {IEEE Software}, Title = {Software Analytics: So What?}, Year = {2013}, Month = {July}, Volume = {30}, Number = {4}, Pages = {31-37}, Abstract = {The guest editors of this special issue of IEEE Software invited submissions that reflected the benefits (and drawbacks) of software analytics, an area of explosive growth. They had so many excellent submissions that they had to split this special issue into two volumes--you'll see even more content in the September/October issue. They divided the articles on conceptual grounds, so both volumes will feature equally excellent work. The Web extra at http://youtu.be/nO6X0azR0nw is a video interview in which IEEE Software editor in chief Forrest Shull speaks with Tim Menzies about the growing importance of software analytics.}, Keywords = {program diagnostics;software engineering;IEEE Software;explosive software growth;software analytics;Data analysis;Data models;Decision making;Software algorithms;Software development;Software engineering;Special issues and sections;analysis;big data;measurement;metrics;software analytics}, DOI = {10.1109/MS.2013.86}, ISSN = {0740-7459} } @Article{ SoftwareCitationPrinciples, Author = {Smith, Arfon M. and Katz, Daniel S. and Niemeyer, Kyle E.}, Title = {Software citation principles}, Journal = {PeerJ Computer Science}, Volume = {2}, Pages = {e86}, Year = {2016}, URL = {https://doi.org/10.7717/peerj-cs.86}, DOI = {10.7717/peerj-cs.86}, timestamp = {Thu, 08 Jun 2017 09:07:38 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/peerj-cs/SmithKN16}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Article{ SoftwareCitationPrinciples-2016, Author = {Arfon M. Smith and Daniel S. Katz and Kyle E. Niemeyer}, Title = {Software citation principles}, Journal = {PeerJ Computer Science}, Volume = {2}, Pages = {e86}, Year = {2016}, URL = {https://doi.org/10.7717/peerj-cs.86}, DOI = {10.7717/peerj-cs.86}, timestamp = {Wed, 14 Nov 2018 10:22:40 +0100}, biburl = {https://dblp.org/rec/bib/journals/peerj-cs/SmithKN16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ Soito2016, Author = {Laura Soito and J Hwang, Lorraine}, Title = "Citations for Software : Providing Identification, Access and Recognition for Research Software", Journal = "International Journal of Digital Curation", Volume = 11, Number = 2, Year = 2016, DOI = {10.2218/ijdc.v11i2.390}, URL = "http://www.ijdc.net/index.php/ijdc/article/view/11.2.48/451" } @Article{ SpinellisUnix2017, Author = {Diomidis Spinellis}, Title = {A repository of Unix history and evolution}, Journal = {Empirical Software Engineering}, Volume = {22}, Number = {3}, Pages = {1372--1404}, Year = {2017}, URL = {https://doi.org/10.1007/s10664-016-9445-5}, DOI = {10.1007/s10664-016-9445-5}, timestamp = {Thu, 01 Jun 2017 18:57:39 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/journals/ese/Spinellis17}, bibsource = {dblp computer science bibliography, http://dblp.org} } @InProceedings{ squire2012describing, Title = {Describing the software forge ecosystem}, Author = {Squire, Megan and Williams, David}, BookTitle = {System Science (HICSS), 2012 45th Hawaii International Conference on}, Pages = {3416--3425}, Year = {2012}, Organization = {IEEE} } @Unpublished{ SSIguidelines, Author = {Mike Jackson}, Title = {How to cite and describe software}, Note = {Accessed on December 31st 2018}, URL = {https://www.software.ac.uk/how-cite-software} } @Article{ stanisicld15, Author = {Luka Stanisic and Arnaud Legrand and Vincent Danjean}, Title = {An Effective Git And Org-Mode Based Workflow For Reproducible Research}, Journal = {Operating Systems Review}, Volume = {49}, Number = {1}, Pages = {61--70}, Year = {2015}, URL = {https://doi.org/10.1145/2723872.2723881}, DOI = {10.1145/2723872.2723881}, timestamp = {Tue, 06 Nov 2018 12:51:34 +0100}, biburl = {https://dblp.org/rec/bib/journals/sigops/StanisicLD15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ Stodden-reprod-2012, Author = {Victoria Stodden and Randall J. LeVeque and Ian Mitchell}, Title = {Reproducible Research for Scientific Computing: Tools and Strategies for Changing the Culture}, Journal = {Computing in Science and Engineering}, Volume = {14}, Number = {4}, Pages = {13--17}, Year = {2012}, URL = {https://doi.org/10.1109/MCSE.2012.38}, DOI = {10.1109/MCSE.2012.38}, timestamp = {Thu, 15 Jun 2017 21:31:58 +0200}, biburl = {https://dblp.org/rec/bib/journals/cse/StoddenLM12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ swh-hal-deposit-2018, Author = "Yannick Barborini and Di Cosmo, Roberto and Dumont, Antoine R. and Morane Gruenpeter and Bruno Marmol and Alain Monteil and Jozefina Sadowska and Stefano Zacchiroli", Title = "The creation of a new type of scientific deposit: Software", BookTitle = "RDA Eleventh Plenary Meeting", Year = 2018, HowPublished = {https://www.rd-alliance.org/rda-11th-plenary-poster-session}, halid = {hal-01738741} } @Article{ swhcacm2018, Author = {Abramatic, Jean-Fran\c{c}ois and Di Cosmo, Roberto and Zacchiroli, Stefano}, Title = {Building the Universal Archive of Source Code}, Journal = {Communications of the {ACM}}, issue_date = {October 2018}, Volume = {61}, Number = {10}, Month = sep, Year = {2018}, ISSN = {0001-0782}, Pages = {29--31}, numpages = {3}, URL = {http://doi.acm.org/10.1145/3183558}, DOI = {10.1145/3183558}, acmid = {3183558}, Publisher = {ACM}, Address = {New York, NY, USA} } @InProceedings{ swhipres2017, Author = "Di Cosmo, Roberto and Stefano Zacchiroli", Title = "{Software Heritage}: Why and How to Preserve Software Source Code", BookTitle = {Proceedings of the 14th International Conference on Digital Preservation, iPRES 2017}, Month = sep, Year = {2017}, URL = {https://hal.archives-ouvertes.fr/hal-01590958/} } @InProceedings{ swhipres2018, Author = "Di Cosmo, Roberto and Morane Gruenpeter and Stefano Zacchiroli", Title = "Identifiers for Digital Objects: the Case of Software Source Code Preservation", BookTitle = {Proceedings of the 15th International Conference on Digital Preservation, iPRES 2018, Boston, USA}, Month = sep, Year = {2018}, URL = {https://hal.archives-ouvertes.fr/hal-01865790}, urllocal = {http://www.dicosmo.org/Articles/iPres2018.pdf}, dmi-category = {intc}, x-topic = "preservation", DOI = "10.17605/OSF.IO/KDE56" } @InProceedings{ swh-msr2019-dataset, Author = {Antoine Pietri and Diomidis Spinellis and Stefano Zacchiroli}, Title = {The {S}oftware {H}eritage graph dataset: public software development under one roof}, BookTitle = {Proceedings of the 16th International Conference on Mining Software Repositories, {MSR} 2019, 26-27 May 2019, Montreal, Canada.}, Pages = {138--142}, Year = {2019}, CrossRef = {DBLP:conf/msr/2019}, URL = {https://dl.acm.org/citation.cfm?id=3341907}, timestamp = {Sun, 23 Jun 2019 16:40:54 +0200}, biburl = {https://dblp.org/rec/bib/conf/msr/PietriSZ19}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ msr-2020-challenge, Author = {Antoine Pietri and Diomidis Spinellis and Stefano Zacchiroli}, Title = {The Software Heritage Graph Dataset: Large-scale Analysis of Public Software Development History}, Abstract = {Software Heritage is the largest existing public archive of software source code and accompanying development history. It spans more than five billion unique source code files and one billion unique commits, coming from more than 80 million software projects. These software artifacts were retrieved from major collaborative development platforms (e.g., GitHub, GitLab) and package repositories (e.g., PyPI, Debian, NPM), and stored in a uniform representation linking together source code files, directories, commits, and full snapshots of version control systems (VCS) repositories as observed by Software Heritage during periodic crawls. This dataset is unique in terms of accessibility and scale, and allows to explore a number of research questions on the long tail of public software development, instead of solely focusing on "most starred" repositories as it often happens.}, Publisher = {IEEE}, Year = {2020}, BookTitle = {MSR 2020: The 17th International Conference on Mining Software Repositories}, Note = {to appear} } @Proceedings{ DBLP:conf/msr/2019, Editor = {Margaret{-}Anne D. Storey and Bram Adams and Sonia Haiduc}, Title = {Proceedings of the 16th International Conference on Mining Software Repositories, {MSR} 2019, 26-27 May 2019, Montreal, Canada}, Publisher = {{IEEE} / {ACM}}, Year = {2019}, URL = {https://dl.acm.org/citation.cfm?id=3341883}, ISBN = {978-1-7281-3412-3}, timestamp = {Sun, 23 Jun 2019 16:40:54 +0200}, biburl = {https://dblp.org/rec/bib/conf/msr/2019}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ syeed-2013-oss-evol-review, Author = {M. M. Mahbubul Syeed and Imed Hammouda and Tarja Syst{\"a}}, Title = {Evolution of Open Source Software Projects: A Systematic Literature Review}, Journal = {JSW}, Volume = {8}, Number = {11}, Year = {2013}, Pages = {2815-2829}, ee = {http://dx.doi.org/10.4304/jsw.8.11.2815-2829}, bibsource = {DBLP, http://dblp.uni-trier.de} } @Article{ thain2015, Title = {Techniques for Preserving Scientific Software Executions: Preserve the Mess or Encourage Cleanliness?}, Author = {Douglas Thain and Peter Ivie and Haiyan Meng}, Journal = {Proceedings of the International Conference on Digital Preservation (iPRES)}, Year = {2015}, DOI = {http://dx.doi.org/doi:10.7274/R0CZ353M} } @Article{ the-real-software-crisis, Author = {Krishnamurthi, Shriram and Vitek, Jan}, Title = {The Real Software Crisis: Repeatability As a Core Value}, Journal = {Communications of the {ACM}}, Volume = {58}, Number = {3}, Month = feb, Year = {2015}, Pages = {34--36}, DOI = {10.1145/2658987}, URL = {http://doi.org/10.1145/2658987}, Publisher = {ACM} } @Misc{ titusbrown2015, Author = "Titus Brown, C.", Title = "Pubwication of software papers, and authorship on them", Year = "2015", Month = sep, URL = "https://ivory.idyll.org/blog/2015-authorship-on-software-papers.html", Note = "retrieved January 2019" } @Article{ treolar2014, Title = {A perspective on Archiving the Scholarly Web}, Author = {Van de Sompel, Herbert and Andrew Treolar}, Journal = {Proceedings of the International Conference on Digital Preservation (iPRES)}, Pages = {194--198}, Year = {2014} } @Misc{ UnescoInria2017, Title = "L'UNESCO et INRIA signent un accord sur l'archivage des logiciels en pr\'esence du Pr\'esident Hollande", HowPublished = "\url{http://fr.unesco.org/news/unesco-inria-signent-accord-archivage-logiciels-presence-du-president-hollande}", Year = 2017, Month = apr } @Misc{ unescopersist, key = {UNESCO}, Title = {UNESCO PERSIST Programme}, Year = {2015}, HowPublished = {\url{https://unescopersist.org/}} } @TechReport{ us-fed-strategy, key = {US Fed}, Title = {Strategy for American Innovation}, Institution = {[US] Federal Register}, Year = {2014}, Note = {\url{https://www.federalregister.gov/articles/2014/07/29/2014-17761/strategy-for-american-innovation}} } @Misc{ uuid, Author = "P. Leach, M. Mealling, R. Salz", Title = "A Universally Unique IDentifier (UUID) URN Namespace", DOI = "10.17487/RFC4122", URL = "https://www.rfc-editor.org/info/rfc4122", Year = "2005" } @Article{ VanNoorden2014, DOI = {10.1038/514550a}, URL = {https://doi.org/10.1038/514550a}, Year = {2014}, Month = {oct}, Publisher = {Springer Nature}, Volume = {514}, Number = {7524}, Pages = {550--553}, Author = {Richard Van Noorden and Brendan Maher and Regina Nuzzo}, Title = {The top 100 papers}, Journal = {Nature} } @Misc{ web:arxiv, Title = "ArXiv: e-prints archive", Year = "2017", URL = "\url{https://arxiv.org/}", Note = "retrieved June 2017" } @Misc{ web:cines, key = {CINES}, Title = {Centre Informatique National de l'Enseignement Supérieur}, Year = "2014", URL = "\url{https://www.cines.fr/}", Note = "\url{https://www.cines.fr/}" } @Misc{ web:dp-coalition, key = {DPC}, Title = {Digital Preservation Coalition}, URL = "http://www.dpconline.org/", Note = "\url{http://www.dpconline.org/}" } @Misc{ web:githubarchive, key = {GitHubArchive}, Title = {GitHub Archive}, Year = "2017", HowPublished = "https://www.githubarchive.org/", Note = "Retrivede March 2017" } @Misc{ web:gmane, key = {Gmane}, Title = {Gmane}, Year = "2017", HowPublished = "\url{http://gmane.org}" } @Misc{ web:hal, Title = "HAL: Hyper articles en ligne", Year = "2017", URL = "\url{https://hal.archives-ouvertes.fr/}", Note = "retrieved June 2017" } @Misc{ web:internet-archive, key = {InternetArchive}, Title = {Internet Archive: Digital Library of Free Books, Movies, Music \& Wayback Machine}, URL = "https://archive.org", Year = "Retrieved 3/3/2017", Note = "\url{https://archive.org}" } @Misc{ web:national-sw-ref-library, Author = {NIST}, Title = "{[US]} National Software Reference Library", Year = "2014", URL = "\url{http://www.nsrl.nist.gov}", Note = "\url{http://www.nsrl.nist.gov}" } @Misc{ web:national-sw-ref-library-hash, Author = {Steve Mead}, Title = "Unique File Identification in the {National Software Reference Library}", Year = "2014", HowPublished = "\url{http://www.nsrl.nist.gov}", Institution = "National Institute of Standards & Technology", Note = "smead@nist.gov" } @Misc{ web:openaire, key = {OpenAIRE}, Title = {OpenAIRE}, Year = "2014", URL = "https://www.openaire.eu/", Note = "\url{https://www.openaire.eu/}" } @Misc{ web:openedition, Title = "Open Edition", Year = "2017", URL = "\url{https://www.openedition.org/}", Note = "retrieved June 2017" } @Misc{ web:zenodo, key = {Zenodo}, Title = {Zenodo}, Year = "2013", HowPublished = "\url{https://zenodo.org/}" } @Article{ Whitt2017, Author = {Whitt, Richard S.}, Title = {"Through A Glass, Darkly" Technical, Policy, and Financial Actions to Avert the Coming Digital Dark Ages}, Journal = {Santa Clara High Tech. L.J.}, Year = 2017, Volume = 33, Number = 2, Pages = 117, Month = jan, Note = {Available at: http://digitalcommons.law.scu.edu/chtlj/vol33/iss2/1} } @Misc{ wikipedia:cultural-heritage, Author = "Wikipedia", Title = "Cultural heritage --- {W}ikipedia{,} The Free Encyclopedia", Year = "2015", URL = "\url{https://en.wikipedia.org/wiki/Cultural_heritage}", Note = "retrieved September 2015" } @Misc{ wikipedia:notability, Author = "Wikipedia", Title = "{Wikipedia:Notability} --- {W}ikipedia{,} The Free Encyclopedia", Year = "2015", URL = "\url{https://en.wikipedia.org/wiki/Wikipedia:Notability}", Note = "retrieved November 2015" } @Article{ wilkinson2016fair, Title = {The {FAIR} Guiding Principles for scientific data management and stewardship}, Author = {Wilkinson, Mark D and Dumontier, Michel and Aalbersberg, IJsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E and others}, Journal = {Scientific data}, Volume = {3}, Year = {2016}, Publisher = {Nature Publishing Group} } @Article{ Wimalaratne2018, Author = {Wimalaratne, Sarala M. and Juty, Nick and Kunze, John and Jan{\'e}e, Greg and McMurry, Julie A. and Beard, Niall and Jimenez, Rafael and Grethe, Jeffrey S. and Hermjakob, Henning and Martone, Maryann E. and Clark, Tim}, Title = {Uniform resolution of compact identifiers for biomedical data}, Journal = {Scientific Data}, Year = 2018, Month = {May}, day = 08, Publisher = {Nature}, Volume = 5, Pages = {180029}, Note = {Article}, Abstract = {We report here on our project to provide robust support for machine-resolvable, persistent compact identifiers in biomedical data citation, by harmonizing the Identifiers.org and N2T.net (Name-To-Thing) meta-resolvers and extending their capabilities.}, URL = {https://doi.org/10.1038/sdata.2018.29} } @Proceedings{ 2013-preserving-exe, Editor = {Trevor Owens}, Title = {Preserving.exe Report: Toward a [US] National Strategy for Preserving Software}, Year = {2013}, Month = jul, Note = {\url{http://blogs.loc.gov/digitalpreservation/2013/10/preserving-exe-report-toward-a-national-strategy-for-preserving-software/}} } @Proceedings{ 2014-digital-preservation, Editor = {Erin Engle}, Title = {Digital Preservation 2014: annual meeting of the [US] National Digital Information Infrastructure and Preservation Program and the [US] National Digital Stewardship Alliance}, Year = {2014}, Month = jul, Note = {\url{http://www.digitalpreservation.gov/meetings/ndiipp14.html}} } @Proceedings{ DBLP:conf/icms/2018, Editor = {James H. Davenport and Manuel Kauers and George Labahn and Josef Urban}, Title = {Mathematical Software - {ICMS} 2018 - 6th International Conference, South Bend, IN, USA, July 24-27, 2018, Proceedings}, Series = {Lecture Notes in Computer Science}, Volume = {10931}, Publisher = {Springer}, Year = {2018}, URL = {https://doi.org/10.1007/978-3-319-96418-8}, DOI = {10.1007/978-3-319-96418-8}, ISBN = {978-3-319-96417-1}, timestamp = {Tue, 17 Jul 2018 12:46:33 +0200}, biburl = {https://dblp.org/rec/bib/conf/icms/2018}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/icse/2016c, Editor = {Laura K. Dillon and Willem Visser and Laurie Williams}, Title = {Proceedings of the 38th International Conference on Software Engineering, {ICSE} 2016, Austin, TX, USA, May 14-22, 2016 - Companion Volume}, Publisher = {{ACM}}, Year = {2016}, URL = {http://dl.acm.org/citation.cfm?id=2889160}, ISBN = {978-1-4503-4205-6}, timestamp = {Fri, 24 Mar 2017 09:33:29 +0100}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/2016c}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Proceedings{ DBLP:conf/msr/2012, Editor = {Michele Lanza and Massimiliano Di Penta and Tao Xie}, Title = {9th {IEEE} Working Conference of Mining Software Repositories, {MSR} 2012, June 2-3, 2012, Zurich, Switzerland}, Publisher = {{IEEE} Computer Society}, Year = {2012}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6220358}, ISBN = {978-1-4673-1761-0}, timestamp = {Wed, 13 May 2015 17:45:20 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/msr/2012}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Proceedings{ DBLP:conf/msr/2017, Editor = {Jes{\'{u}}s M. Gonzalez{-}Barahona and Abram Hindle and Lin Tan}, Title = {Proceedings of the 14th International Conference on Mining Software Repositories, {MSR} 2017, Buenos Aires, Argentina, May 20-28, 2017}, Publisher = {{IEEE} Computer Society}, Year = {2017}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7959735}, ISBN = {978-1-5386-1544-7}, timestamp = {Fri, 07 Jul 2017 14:06:35 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/msr/2017}, bibsource = {dblp computer science bibliography, http://dblp.org} } @Book{ applied-crypto, Title = {Applied cryptography: protocols, algorithms, and source code in C 2nd edition}, Author = {Schneier, Bruce}, Year = {2007}, Publisher = {{J}ohn {W}iley \& {S}ons}, ISBN = {9788126513680} } @InProceedings{ boldi-vigna-webgraph-1, Author = {Paolo Boldi and Sebastiano Vigna}, Title = {The webgraph framework {I:} compression techniques}, BookTitle = {Proceedings of the 13th international conference on World Wide Web, {WWW} 2004, New York, NY, USA, May 17-20, 2004}, Pages = {595--602}, Year = {2004}, CrossRef = {DBLP:conf/www/2004}, URL = {https://doi.org/10.1145/988672.988752}, DOI = {10.1145/988672.988752}, timestamp = {Tue, 06 Nov 2018 16:57:08 +0100}, biburl = {https://dblp.org/rec/bib/conf/www/BoldiV04}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ apostolico2009graph, Title = {Graph compression by BFS}, Author = {Apostolico, Alberto and Drovandi, Guido}, Journal = {Algorithms}, Volume = {2}, Number = {3}, Pages = {1031--1044}, Year = {2009}, Publisher = {Molecular Diversity Preservation International} } @Proceedings{ DBLP:conf/www/2004, Editor = {Stuart I. Feldman and Mike Uretsky and Marc Najork and Craig E. Wills}, Title = {Proceedings of the 13th international conference on World Wide Web, {WWW} 2004, New York, NY, USA, May 17-20, 2004}, Publisher = {{ACM}}, Year = {2004}, ISBN = {1-58113-844-X}, timestamp = {Fri, 10 Sep 2004 15:22:27 +0200}, biburl = {https://dblp.org/rec/bib/conf/www/2004}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ boldi-vigna-webgraph-2, Author = {Paolo Boldi and Sebastiano Vigna}, Title = {The WebGraph Framework {II:} Codes For The World-Wide Web}, BookTitle = {2004 Data Compression Conference {(DCC} 2004), 23-25 March 2004, Snowbird, UT, {USA}}, Pages = {528}, Year = {2004}, CrossRef = {DBLP:conf/dcc/2004}, URL = {https://doi.org/10.1109/DCC.2004.1281504}, DOI = {10.1109/DCC.2004.1281504}, timestamp = {Tue, 23 May 2017 01:07:01 +0200}, biburl = {https://dblp.org/rec/bib/conf/dcc/BoldiV04}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/dcc/2004, Title = {2004 Data Compression Conference {(DCC} 2004), 23-25 March 2004, Snowbird, UT, {USA}}, Publisher = {{IEEE} Computer Society}, Year = {2004}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=9014}, ISBN = {0-7695-2082-0}, timestamp = {Tue, 12 May 2015 17:11:45 +0200}, biburl = {https://dblp.org/rec/bib/conf/dcc/2004}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ ParisCall2019, Author = {Expert group meeting}, Title = {Paris Call: Software Source Code as Heritage for Sustainable Development}, HowPublished = {Available from https://unesdoc.unesco.org/ark:/48223/pf0000366715}, Year = 2019 } @Misc{ VirtualAGC, Author = {Ronald Burkey}, Title = {Virtual AGC - Changelog}, HowPublished = {Available at \url{http://ibiblio.org/apollo/changes.html}}, Note = {Spans years 2003 to 2019} } @Book{ Abelson:SIC85, Author = "Harold Abelson and Gerald J. Sussman with Julie Sussman", Title = "Structure and Interpretation of Computer Programs", Publisher = pub-mit # " and " # pub-mcgraw-hill, Address = pub-mit:adr, Year = "1985", ISBN = "0-262-01077-1 (MIT Press), 0-07-000422-6 (McGraw-Hill)", isbn-13 = "978-0-262-01077-1 (MIT Press), 978-0-07-000422-1 (McGraw-Hill)", lccn = "QA76.6 .A255 1984" } @InProceedings{ kalliamvakou2014promises, Title = {The promises and perils of mining GitHub}, Author = {Kalliamvakou, Eirini and Gousios, Georgios and Blincoe, Kelly and Singer, Leif and German, Daniel M and Damian, Daniela}, BookTitle = {Proceedings of the 11th working conference on mining software repositories}, Pages = {92--101}, Year = {2014}, Organization = {ACM} } @Article{ cosentino2017systematic, Title = {A systematic mapping study of software development with GitHub}, Author = {Cosentino, Valerio and Izquierdo, Javier L C{\'a}novas and Cabot, Jordi}, Journal = {IEEE Access}, Volume = {5}, Pages = {7173--7192}, Year = {2017}, Publisher = {IEEE} } @Article{ jiang2017whyfork, Title = {Why and how developers fork what from whom in GitHub}, Author = {Jiang, Jing and Lo, David and He, Jiahuan and Xia, Xin and Kochhar, Pavneet Singh and Zhang, Li}, Journal = {Empirical Software Engineering}, Volume = {22}, Number = {1}, Pages = {547--578}, Year = {2017}, Publisher = {Springer} } @InProceedings{ lima2014ghsocial, Title = {Coding together at scale: GitHub as a collaborative social network}, Author = {Lima, Antonio and Rossi, Luca and Musolesi, Mirco}, BookTitle = {Eighth International AAAI Conference on Weblogs and Social Media}, Year = {2014} } @InProceedings{ biazzini2014maythefork, Title = {May the fork be with you: novel metrics to analyze collaboration on github}, Author = {Biazzini, Marco and Baudry, Benoit}, BookTitle = {Proceedings of the 5th International Workshop on Emerging Trends in Software Metrics}, Pages = {37--43}, Year = {2014}, Organization = {ACM} } @InProceedings{ padhye2014extcontrib, Title = {A study of external community contribution to open-source projects on GitHub}, Author = {Padhye, Rohan and Mani, Senthil and Sinha, Vibha Singhal}, BookTitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, Pages = {332--335}, Year = {2014}, Organization = {ACM} } @InProceedings{ thung2013network, Title = {Network structure of social coding in github}, Author = {Thung, Ferdian and Bissyande, Tegawende F and Lo, David and Jiang, Lingxiao}, BookTitle = {2013 17th European Conference on Software Maintenance and Reengineering}, Pages = {323--326}, Year = {2013}, Organization = {IEEE} } @InProceedings{ rastogi2016forking, Title = {Forking and the Sustainability of the Developer Community Participation--An Empirical Investigation on Outcomes and Reasons}, Author = {Rastogi, Ayushi and Nagappan, Nachiappan}, BookTitle = {2016 IEEE 23rd International Conference on Software Analysis, Evolution, and Reengineering (SANER)}, Volume = {1}, Pages = {102--111}, Year = {2016}, Organization = {IEEE} } @InProceedings{ stuanciulescu2015forked, Title = {Forked and integrated variants in an open-source firmware project}, Author = {Stanciulescu, Stefan and Schulze, Sandro and Wasowski, Andrzej}, BookTitle = {2015 IEEE International Conference on Software Maintenance and Evolution (ICSME)}, Pages = {151--160}, Year = {2015}, Organization = {IEEE} } @Article{ kagdi2007msrsurvey, Title = {A survey and taxonomy of approaches for mining software repositories in the context of software evolution}, Author = {Kagdi, Huzefa and Collard, Michael L. and Maletic, Jonathan I.}, Journal = {Journal of software maintenance and evolution: Research and practice}, Volume = {19}, Number = {2}, Pages = {77--131}, Year = {2007}, Publisher = {Wiley Online Library} } @TechReport{ roy2007clonedetectionsurvey, Title = {A survey on software clone detection research}, Author = {Roy, Chanchal Kumar and Cordy, James R}, Institution = {Queen's School of Computing}, Volume = {541}, Number = {115}, Pages = {64--68}, Year = {2007} } @Article{ rattan2013clonedetectionreview, Title = {Software clone detection: A systematic review}, Author = {Rattan, Dhavleesh and Bhatia, Rajesh and Singh, Maninder}, Journal = {Information and Software Technology}, Volume = {55}, Number = {7}, Pages = {1165--1199}, Year = {2013}, Publisher = {Elsevier} } @InCollection{ mens2008swevolintro, Title = {Introduction and roadmap: History and challenges of software evolution}, Author = {Mens, Tom}, BookTitle = {Software evolution}, Pages = {1--11}, Year = {2008}, Publisher = {Springer} } @InProceedings{ dabbish2012socialcoding, Title = {Social coding in GitHub: transparency and collaboration in an open software repository}, Author = {Dabbish, Laura and Stuart, Colleen and Tsay, Jason and Herbsleb, Jim}, BookTitle = {Proceedings of the ACM 2012 conference on computer supported cooperative work}, Pages = {1277--1286}, Year = {2012}, Organization = {ACM} } @InProceedings{ gousios2014pullrequests, Title = {An exploratory study of the pull-based software development model}, Author = {Gousios, Georgios and Pinzger, Martin and Deursen, Arie van}, BookTitle = {Proceedings of the 36th International Conference on Software Engineering}, Pages = {345--355}, Year = {2014}, Organization = {ACM} } @Article{ spinellis2005vcs, Title = {Version control systems}, Author = {Spinellis, Diomidis}, Journal = {IEEE Software}, Volume = {22}, Number = {5}, Pages = {108--109}, Year = {2005}, Publisher = {IEEE} } @InProceedings{ ray2014large, Title = {A large scale study of programming languages and code quality in github}, Author = {Ray, Baishakhi and Posnett, Daryl and Filkov, Vladimir and Devanbu, Premkumar}, BookTitle = {Proceedings of the 22nd ACM SIGSOFT International Symposium on Foundations of Software Engineering}, Pages = {155--165}, Year = {2014}, Organization = {ACM} } @Article{ stol2014innersource, Title = {Inner source--adopting open source development practices in organizations: a tutorial}, Author = {Stol, Klaas-Jan and Fitzgerald, Brian}, Journal = {IEEE Software}, Volume = {32}, Number = {4}, Pages = {60--67}, Year = {2014}, Publisher = {IEEE} } @Article{ capraro2017innersource, Title = {Inner source definition, benefits, and challenges}, Author = {Capraro, Maximilian and Riehle, Dirk}, Journal = {ACM Computing Surveys (CSUR)}, Volume = {49}, Number = {4}, Pages = {67}, Year = {2017}, Publisher = {ACM} } @InProceedings{ robles2012forks, Author = {Gregorio Robles and Jes{\'{u}}s M. Gonz{\'{a}}lez{-}Barahona}, Title = {A Comprehensive Study of Software Forks: Dates, Reasons and Outcomes}, BookTitle = {Open Source Systems: Long-Term Sustainability - 8th {IFIP} {WG} 2.13 International Conference, {OSS} 2012, Hammamet, Tunisia, September 10-13, 2012. Proceedings}, Pages = {1--14}, Year = {2012}, CrossRef = {DBLP:conf/oss/2012}, URL = {https://doi.org/10.1007/978-3-642-33442-9\_1}, DOI = {10.1007/978-3-642-33442-9\_1}, timestamp = {Wed, 25 Sep 2019 18:23:01 +0200}, biburl = {https://dblp.org/rec/bib/conf/oss/RoblesG12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/oss/2012, Editor = {Imed Hammouda and Bj{\"{o}}rn Lundell and Tommi Mikkonen and Walt Scacchi}, Title = {Open Source Systems: Long-Term Sustainability - 8th {IFIP} {WG} 2.13 International Conference, {OSS} 2012, Hammamet, Tunisia, September 10-13, 2012. Proceedings}, Series = {{IFIP} Advances in Information and Communication Technology}, Volume = {378}, Publisher = {Springer}, Year = {2012}, URL = {https://doi.org/10.1007/978-3-642-33442-9}, DOI = {10.1007/978-3-642-33442-9}, ISBN = {978-3-642-33441-2}, timestamp = {Thu, 25 May 2017 00:41:57 +0200}, biburl = {https://dblp.org/rec/bib/conf/oss/2012}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ nyman2011-fork-or-not, Author = {Linus Nyman and Tommi Mikkonen}, Title = {To Fork or Not to Fork: Fork Motivations in SourceForge Projects}, Journal = {{IJOSSP}}, Volume = {3}, Number = {3}, Pages = {1--9}, Year = {2011}, URL = {https://doi.org/10.4018/jossp.2011070101}, DOI = {10.4018/jossp.2011070101}, timestamp = {Sat, 20 May 2017 00:23:28 +0200}, biburl = {https://dblp.org/rec/bib/journals/ijossp/NymanM11}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ nyman2012forking-sustainability, Author = {Linus Nyman and Tommi Mikkonen and Juho Lindman and Martin Foug{\`{e}}re}, Title = {Perspectives on Code Forking and Sustainability in Open Source Software}, BookTitle = {Open Source Systems: Long-Term Sustainability - 8th {IFIP} {WG} 2.13 International Conference, {OSS} 2012, Hammamet, Tunisia, September 10-13, 2012. Proceedings}, Pages = {274--279}, Year = {2012}, CrossRef = {DBLP:conf/oss/2012}, URL = {https://doi.org/10.1007/978-3-642-33442-9\_21}, DOI = {10.1007/978-3-642-33442-9\_21}, timestamp = {Tue, 26 Jun 2018 14:13:50 +0200}, biburl = {https://dblp.org/rec/bib/conf/oss/NymanMLF12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ nyman2014forking-hackers, Author = {Linus Nyman}, Title = {Hackers on Forking}, BookTitle = {Proceedings of The International Symposium on Open Collaboration, OpenSym 2014, Berlin, Germany, August 27 - 29, 2014}, Pages = {6:1--6:10}, Year = {2014}, CrossRef = {DBLP:conf/wikis/2014}, URL = {https://doi.org/10.1145/2641580.2641590}, DOI = {10.1145/2641580.2641590}, timestamp = {Tue, 06 Nov 2018 16:58:30 +0100}, biburl = {https://dblp.org/rec/bib/conf/wikis/Nyman14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/wikis/2014, Editor = {Dirk Riehle and Jes{\'{u}}s M. Gonz{\'{a}}lez{-}Barahona and Gregorio Robles and Kathrin M. M{\"{o}}slein and Ina Schieferdecker and Ulrike Cress and Astrid Wichmann and Brent J. Hecht and Nicolas Jullien}, Title = {Proceedings of The International Symposium on Open Collaboration, OpenSym 2014, Berlin, Germany, August 27 - 29, 2014}, Publisher = {{ACM}}, Year = {2014}, URL = {http://dl.acm.org/citation.cfm?id=2641580}, ISBN = {978-1-4503-3016-9}, timestamp = {Thu, 01 Jun 2017 18:58:27 +0200}, biburl = {https://dblp.org/rec/bib/conf/wikis/2014}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ nyman2016forkhistory, Author = {Linus Nyman and Mikael Laakso}, Title = {Notes on the History of Fork and Join}, Journal = {{IEEE} Annals of the History of Computing}, Volume = {38}, Number = {3}, Pages = {84--87}, Year = {2016}, URL = {https://doi.org/10.1109/MAHC.2016.34}, DOI = {10.1109/MAHC.2016.34}, timestamp = {Wed, 14 Nov 2018 10:41:58 +0100}, biburl = {https://dblp.org/rec/bib/journals/annals/NymanL16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ alexandru2019redundancy, Author = {Carol V. Alexandru and Sebastiano Panichella and Sebastian Proksch and Harald C. Gall}, Title = {Redundancy-free analysis of multi-revision software artifacts}, Journal = {Empirical Software Engineering}, Volume = {24}, Number = {1}, Pages = {332--380}, Year = {2019}, URL = {https://doi.org/10.1007/s10664-018-9630-9}, DOI = {10.1007/s10664-018-9630-9}, timestamp = {Wed, 27 Feb 2019 18:48:53 +0100}, biburl = {https://dblp.org/rec/bib/journals/ese/AlexandruPPG19}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ alexandru2017redundancy, Author = {Carol V. Alexandru and Sebastiano Panichella and Harald C. Gall}, Title = {Reducing redundancies in multi-revision code analysis}, BookTitle = {{IEEE} 24th International Conference on Software Analysis, Evolution and Reengineering, {SANER} 2017, Klagenfurt, Austria, February 20-24, 2017}, Pages = {148--159}, Year = {2017}, CrossRef = {DBLP:conf/wcre/2017}, URL = {https://doi.org/10.1109/SANER.2017.7884617}, DOI = {10.1109/SANER.2017.7884617}, timestamp = {Wed, 16 Oct 2019 14:14:53 +0200}, biburl = {https://dblp.org/rec/bib/conf/wcre/AlexandruPG17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/wcre/2017, Editor = {Martin Pinzger and Gabriele Bavota and Andrian Marcus}, Title = {{IEEE} 24th International Conference on Software Analysis, Evolution and Reengineering, {SANER} 2017, Klagenfurt, Austria, February 20-24, 2017}, Publisher = {{IEEE} Computer Society}, Year = {2017}, URL = {https://ieeexplore.ieee.org/xpl/conhome/7879528/proceeding}, ISBN = {978-1-5090-5501-2}, timestamp = {Wed, 16 Oct 2019 14:14:53 +0200}, biburl = {https://dblp.org/rec/bib/conf/wcre/2017}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ allamanis2018bigcodesurvey, Title = {A survey of machine learning for big code and naturalness}, Author = {Allamanis, Miltiadis and Barr, Earl T and Devanbu, Premkumar and Sutton, Charles}, Journal = {ACM Computing Surveys (CSUR)}, Volume = {51}, Number = {4}, Pages = {81}, Year = {2018}, Publisher = {ACM} } @Article{ gonzalez2009macro, Title = {Macro-level software evolution: a case study of a large software compilation}, Author = {Gonzalez-Barahona, Jesus M and Robles, Gregorio and Michlmayr, Martin and Amor, Juan Jos{\'e} and German, Daniel M}, Journal = {Empirical Software Engineering}, Volume = {14}, Number = {3}, Pages = {262--285}, Year = {2009}, Publisher = {Springer} } @InProceedings{ SvajlenkoR17, Author = {Jeffrey Svajlenko and Chanchal Kumar Roy}, Title = {Fast and flexible large-scale clone detection with CloneWorks}, BookTitle = {Proceedings of the 39th International Conference on Software Engineering, {ICSE} 2017, Buenos Aires, Argentina, May 20-28, 2017 - Companion Volume}, Pages = {27--30}, Year = {2017}, URL = {https://doi.org/10.1109/ICSE-C.2017.3}, DOI = {10.1109/ICSE-C.2017.3}, timestamp = {Fri, 07 Jul 2017 14:06:36 +0200}, biburl = {https://dblp.org/rec/bib/conf/icse/SvajlenkoR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ SemuraYCI17, Author = {Yuichi Semura and Norihiro Yoshida and Eunjong Choi and Katsuro Inoue}, Title = {CCFinderSW: Clone Detection Tool with Flexible Multilingual Tokenization}, BookTitle = {24th Asia-Pacific Software Engineering Conference, {APSEC} 2017, Nanjing, China, December 4-8, 2017}, Pages = {654--659}, Year = {2017}, URL = {https://doi.org/10.1109/APSEC.2017.80}, DOI = {10.1109/APSEC.2017.80}, timestamp = {Wed, 28 Mar 2018 12:42:10 +0200}, biburl = {https://dblp.org/rec/bib/conf/apsec/SemuraYCI17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ ThummalapentaCAP10, Author = {Suresh Thummalapenta and Luigi Cerulo and Lerina Aversano and Massimiliano Di Penta}, Title = {An empirical study on the maintenance of source code clones}, Journal = {Empirical Software Engineering}, Volume = {15}, Number = {1}, Pages = {1--34}, Year = {2010}, URL = {https://doi.org/10.1007/s10664-009-9108-x}, DOI = {10.1007/s10664-009-9108-x}, timestamp = {Tue, 06 Jun 2017 22:24:59 +0200}, biburl = {https://dblp.org/rec/bib/journals/ese/ThummalapentaCAP10}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ zhou2019fork, Title = {What the fork: a study of inefficient and efficient forking practices in social coding}, Author = {Zhou, Shurui and Vasilescu, Bogdan and K{\"a}stner, Christian}, BookTitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, Pages = {350--361}, Year = {2019}, Organization = {ACM} } @TechReport{ swh-provenance-tr, Author = {Guillaume Rousseau and Di Cosmo, Roberto and Stefano Zacchiroli}, Title = {Growth and Duplication of Public Source Code over Time: Provenance Tracking at Scale}, Year = {2019}, Institution = {Inria}, Note = {\url{https://hal.archives-ouvertes.fr/hal-02158292}} } @Article{ swh-provenance-emse, Author = {Guillaume Rousseau and Di Cosmo, Roberto and Stefano Zacchiroli}, Title = {Software Provenance Tracking at the Scale of Public Source Code}, Publisher = {Springer}, Year = {2020}, ISSN = {1382-3256}, Journal = {Empirical Software Engineering}, Volume = {25}, Number = {4}, Pages = {2930--2959}, URL = {https://doi.org/10.1007/s10664-020-09828-5}, DOI = {10.1007/s10664-020-09828-5}, timestamp = {Thu, 18 Jun 2020 08:15:38 +0200}, biburl = {https://dblp.org/rec/journals/ese/RousseauCZ20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ linguist, Title = {Linguist: Language Savant}, Author = {{GitHub, Inc.}}, HowPublished = {\url{https://github.com/github/linguist}}, Year = {2011}, Note = {retrieved 2020-01-06} } @Misc{ linguist-accuracy, Title = {C\# or Java? TypeScript or JavaScript? Machine learning based classification of programming languages}, Author = {Ganesan, Kavita and Foti, Romano}, Year = {2019}, HowPublished = {GitHub, Inc.~blog post: \url{https://github.blog/2019-07-02-c-or-java-typescript-or-javascript-machine-learning-based-classification-of-programming-languages/}}, Note = {retrieved 2020-01-06} } @InProceedings{ van2016software, Title = {Software language identification with natural language classifiers}, Author = {van Dam, Juriaan Kennedy and Zaytsev, Vadim}, BookTitle = {2016 IEEE 23rd International Conference on Software Analysis, Evolution, and Reengineering (SANER)}, Volume = {1}, Pages = {624--628}, Year = {2016}, Publisher = {IEEE} } @TechReport{ klein2011algorithmic, Title = {Algorithmic programming language identification}, Author = {Klein, David and Murray, Kyle and Weber, Simon}, Institution = {arXiv}, Number = {1106.4064}, Year = {2011}, URL = {https://arxiv.org/abs/1106.4064} } @InProceedings{ ugurel2002classification, Title = {What's the code?: automatic classification of source code archives}, Author = {Ugurel, Secil and Krovetz, Robert and Giles, C Lee}, BookTitle = {Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining}, Pages = {632--638}, Year = {2002}, Organization = {ACM} } @InProceedings{ reyes2016automatic, Title = {Automatic Classification of Source Code Archives by Programming Language: A Deep Learning Approach}, Author = {Reyes, Julio and Ram{\'\i}rez, Diego and Paciello, Julio}, BookTitle = {2016 International Conference on Computational Science and Computational Intelligence (CSCI)}, Pages = {514--519}, Year = {2016}, Organization = {IEEE} } @InProceedings{ gilda2017source, Title = {Source code classification using Neural Networks}, Author = {Gilda, Shlok}, BookTitle = {2017 14th International Joint Conference on Computer Science and Software Engineering (JCSSE)}, Pages = {1--6}, Year = {2017}, Publisher = {IEEE} } @Book{ wexelblat1981hopl-i, Title = {History of programming languages}, Author = {Wexelblat, Richard L}, Year = {1981}, Publisher = {Academic Press} } @Book{ bergin1996hopl-ii, Title = {History of programming languages---II}, Author = {Bergin Jr, Thomas J and Gibson Jr, Richard G}, Year = {1996}, Publisher = {ACM} } @Proceedings{ ryder2007hopl-iii, Editor = {Barbara G. Ryder and Brent Hailpern}, Title = {Proceedings of the Third {ACM} {SIGPLAN} History of Programming Languages Conference (HOPL-III), San Diego, California, USA, 9-10 June 2007}, Publisher = {{ACM}}, Year = {2007}, URL = {http://dl.acm.org/citation.cfm?id=1238844}, timestamp = {Tue, 22 May 2012 15:24:55 +0200}, biburl = {https://dblp.org/rec/bib/conf/hopl/2007}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ sammet1972langhist, Title = {Programming languages: history and future}, Author = {Sammet, Jean E}, Journal = {Communications of the ACM}, Volume = {15}, Number = {7}, Pages = {601--610}, Year = {1972}, Publisher = {ACM} } @Misc{ librariesio2018, Author = {Jeremy Katz}, Title = {{Libraries.io} Open Source Repository and Dependency Metadata}, Month = dec, Year = 2018, Publisher = {Zenodo}, Version = {1.4.0}, DOI = {10.5281/zenodo.2536573}, URL = {https://doi.org/10.5281/zenodo.2536573} } @InProceedings{ mockus2019woc, Title = {World of code: an infrastructure for mining the universe of open source VCS data}, Author = {Ma, Yuxing and Bogart, Chris and Amreen, Sadika and Zaretzki, Russell and Mockus, Audris}, BookTitle = {Proceedings of the 16th International Conference on Mining Software Repositories}, Pages = {143--154}, Year = {2019}, Organization = {IEEE Press} } @InProceedings{ dyer2013boa, Title = {Boa: A language and infrastructure for analyzing ultra-large-scale software repositories}, Author = {Dyer, Robert and Nguyen, Hoan Anh and Rajan, Hridesh and Nguyen, Tien N}, BookTitle = {Proceedings of the 2013 International Conference on Software Engineering}, Pages = {422--431}, Year = {2013}, Organization = {IEEE Press} } @Article{ barabasi2003scalefree, Title = {Scale-free networks}, Author = {Barab{\'a}si, Albert-L{\'a}szl{\'o} and Bonabeau, Eric}, Journal = {Scientific american}, Volume = {288}, Number = {5}, Pages = {60--69}, Year = {2003}, Publisher = {JSTOR} } @Article{ barabasi2002networkstats, Title = {Statistical mechanics of complex networks}, Author = {Albert, R{\'e}ka and Barab{\'a}si, Albert-L{\'a}szl{\'o}}, Journal = {Reviews of modern physics}, Volume = {74}, Number = {1}, Pages = {47}, Year = {2002}, Publisher = {APS} } @InProceedings{ saner-2020-swh-graph, Author = {Paolo Boldi and Antoine Pietri and Sebastiano Vigna and Stefano Zacchiroli}, Title = {Ultra-Large-Scale Repository Analysis via Graph Compression}, Abstract = {We consider the problem of mining the development history—as captured by modern version control systems—of ultra-large-scale software archives (e.g., tens of millions software repositories corresponding). We show that graph compression techniques can be applied to the problem, dramatically reducing the hardware resources needed to mine similarly-sized corpus. As a concrete use case we compress the full Software Heritage archive, consisting of 5 billion unique source code files and 1 billion unique commits, harvested from more than 80 million software projects—encompassing a full mirror of GitHub. The resulting compressed graph fits in less than 100 GB of RAM, corresponding to a hardware cost of less than 300 U.S. dollars. We show that the compressed in-memory representation of the full corpus can be accessed with excellent performances, with edge lookup times close to memory random access. As a sample exploitation experiment we show that the compressed graph can be used to conduct clone detection at this scale, benefiting from main memory access speed.}, Publisher = {IEEE}, Year = {2020}, BookTitle = {SANER 2020: The 27th IEEE International Conference on Software Analysis, Evolution and Reengineering} } @Article{ cise-2020-doi, Author = {Di Cosmo, Roberto and Gruenpeter, Morane and Stefano Zacchiroli}, Title = {Referencing Source Code Artifacts: a Separate Concern in Software Citation}, Abstract = {Among the entities involved in software citation, software source code requires special attention, due to the role it plays in ensuring scientific reproducibility. To reference source code we need identifiers that are not only unique and persistent, but also support integrity checking intrinsically. Suitable iden- tifiers must guarantee that denoted objects will always stay the same, without relying on external third parties and administrative processes. We analyze the role of identifiers for digital objects (IDOs), whose properties are different from, and complementary to, those of the various digital identifiers of objects (DIOs) that are today popular building blocks of software and data citation toolchains. We argue that both kinds of identifiers are needed and detail the syntax, semantics, and practical implementation of the persistent identifiers (PIDs) adopted by the Software Heritage project to reference billions of software source code artifacts such as source code files, directories, and commits.}, Publisher = {IEEE}, Year = {2020}, ISSN = {1521-9615}, Month = mar, Volume = 22, Number = 2, Pages = {33-43}, DOI = {10.1109/MCSE.2019.2963148}, Journal = {Computing in Science and Engineering} } @Book{ mckinney2012scipy, Title = {Python for data analysis: Data wrangling with Pandas, NumPy, and IPython}, Author = {McKinney, Wes}, Year = {2012}, Publisher = {O'Reilly Media, Inc.} } @Book{ oliphant2006numpy, Title = {A guide to NumPy}, Author = {Oliphant, Travis E}, Volume = {1}, Year = {2006}, Publisher = {Trelgol Publishing USA} } @Article{ hopcroft1973graphalgos, Title = {Algorithm 447: efficient algorithms for graph manipulation}, Author = {Hopcroft, John and Tarjan, Robert}, Journal = {Communications of the ACM}, Volume = {16}, Number = {6}, Pages = {372--378}, Year = {1973}, Publisher = {ACM} } @Article{ clauset2009powerlaw, Title = {Power-law distributions in empirical data}, Author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, Mark EJ}, Journal = {SIAM review}, Volume = {51}, Number = {4}, Pages = {661--703}, Year = {2009}, Publisher = {SIAM} } @Book{ fogel2005producingoss, Title = {Producing open source software: How to run a successful free software project}, Author = {Fogel, Karl}, Year = {2005}, Publisher = {O'Reilly Media, Inc.} } @Article{ 2020GtCitation, Author = {Pierre {Alliez} and Di Cosmo, Roberto and Benjamin {Guedj} and Alain {Girault} and Mohand-Said {Hacid} and Arnaud {Legrand} and Nicolas {Rougier}}, Journal = {Computing in Science and Engineering}, Title = {Attributing and Referencing (Research) Software: Best Practices and Outlook From Inria}, Year = {2020}, Volume = {22}, Number = {1}, Pages = {39-52}, Abstract = {Software is a fundamental pillar of modern scientific research, across all fields and disciplines. However, there is a lack of adequate means to cite and reference software due to the complexity of the problem in terms of authorship, roles, and credits. This complexity is further increased when it is considered over the lifetime of a software that can span up to several decades. Building upon the internal experience of Inria, the French research institute for digital sciences, we provide in this article a contribution to the ongoing efforts in order to develop proper guidelines and recommendations for software citation and reference. Namely, we recommend: first, a richer taxonomy for software contributions with a qualitative scale; second, to put humans at the heart of the evaluation; and third, to distinguish citation from reference.}, Keywords = {citation analysis;software engineering;French research institute;digital sciences;software citation;Inria;reference software;attributing software;Software packages;Complexity theory;Research and development;Libraries;Metadata;Best practices;Guidelines;Software citation;software reference;authorship;development process}, DOI = {10.1109/MCSE.2019.2949413}, ISSN = {1558-366X}, Month = {Jan}, Note = {Available from \url{https://hal.archives-ouvertes.fr/hal-02135891}} } @Article{ maxwell1996productivity, Title = {Software development productivity of European space, military, and industrial applications}, Author = {Maxwell, Katrina D and Van Wassenhove, Luk and Dutta, Soumitra}, Journal = {IEEE Transactions on Software Engineering}, Volume = {22}, Number = {10}, Pages = {706--718}, Year = {1996}, Publisher = {IEEE} } @Article{ maxwell2000productivity, Title = {Benchmarking software development productivity}, Author = {Maxwell, Katrina D and Forselius, Pekka}, Journal = {IEEE Software}, Volume = {17}, Number = {1}, Pages = {80--88}, Year = {2000}, Publisher = {IEEE} } @Article{ rodriguez2012productivity, Title = {Empirical findings on team size and productivity in software development}, Author = {Rodr{\'\i}guez, Daniel and Sicilia, MA and Garc{\'\i}a, E and Harrison, Rachel}, Journal = {Journal of Systems and Software}, Volume = {85}, Number = {3}, Pages = {562--570}, Year = {2012}, Publisher = {Elsevier} } @Article{ rabai2011swetrends, Title = {A quantitative model for software engineering trends}, Author = {Rabai, Latifa Ben Arfa and Bai, Yan Zhi and Mili, Ali}, Journal = {Information Sciences}, Volume = {181}, Number = {22}, Pages = {4993--5009}, Year = {2011}, Publisher = {Elsevier} } @Proceedings{ DBLP:conf/ecoop/2014ple, Editor = {Raoul{-}Gabriel Urma and Dominic A. Orchard and Alan Mycroft}, Title = {Proceedings of the 1st Workshop on Programming Language Evolution, PLE@ECOOP 2014, Uppsala, Sweden, July 28, 2014}, Publisher = {{ACM}}, Year = {2014} } @InProceedings{ mciver2000proglangnovices, Author = {McIver, Linda}, Title = {The effect of programming language on error rates of novice programmers}, BookTitle = {Proceedings of the 12th Annual Workshop of the Psychology of Programming Interest Group, {PPIG} 2000, Cosenza, Italy, April 10-13, 2000}, Pages = {15}, Year = {2000} } @InProceedings{ kochhar2016codequality, Title = {A large scale study of multiple programming languages and code quality}, Author = {Kochhar, Pavneet Singh and Wijedasa, Dinusha and Lo, David}, BookTitle = {2016 IEEE 23rd International Conference on Software Analysis, Evolution, and Reengineering (SANER)}, Volume = {1}, Pages = {563--573}, Year = {2016}, Organization = {IEEE} } @Article{ ray2017codequality, Title = {A large-scale study of programming languages and code quality in GitHub}, Author = {Ray, Baishakhi and Posnett, Daryl and Devanbu, Premkumar and Filkov, Vladimir}, Journal = {Communications of the ACM}, Volume = {60}, Number = {10}, Pages = {91--100}, Year = {2017}, Publisher = {ACM} } @Article{ vitek2019codequality, Title = {On the impact of programming languages on code quality: a reproduction study}, Author = {Berger, Emery D and Hollenbeck, Celeste and Maj, Petr and Vitek, Olga and Vitek, Jan}, Journal = {ACM Transactions on Programming Languages and Systems (TOPLAS)}, Volume = {41}, Number = {4}, Pages = {1--24}, Year = {2019}, Publisher = {ACM New York, NY, USA} } @InProceedings{ rougier2018rescience, Author = {Nicolas P. Rougier and Konrad Hinsen}, Title = {ReScience {C:} {A} Journal for Reproducible Replications in Computational Science}, BookTitle = {Reproducible Research in Pattern Recognition - Second International Workshop, {RRPR} 2018, Beijing, China}, Pages = {150--156}, Year = {2019}, Series = {Lecture Notes in Computer Science}, Volume = {11455}, Publisher = {Springer}, URL = {https://doi.org/10.1007/978-3-030-23987-9\_14}, DOI = {10.1007/978-3-030-23987-9\_14}, timestamp = {Fri, 05 Jul 2019 09:41:16 +0200}, biburl = {https://dblp.org/rec/bib/conf/rrpr/RougierH18}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ vitek2017dejavu, Author = {Cristina V. Lopes and Petr Maj and Pedro Martins and Vaibhav Saini and Di Yang and Jakub Zitny and Hitesh Sajnani and Jan Vitek}, Title = {D{\'{e}}j{\`{a}}Vu: a map of code duplicates on GitHub}, Journal = {{PACMPL}}, Volume = {1}, Number = {{OOPSLA}}, Pages = {84:1--84:28}, Year = {2017}, URL = {https://doi.org/10.1145/3133908}, DOI = {10.1145/3133908}, timestamp = {Tue, 06 Nov 2018 12:51:05 +0100}, biburl = {https://dblp.org/rec/bib/journals/pacmpl/LopesMMSYZSV17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ AttardiF96, Author = {Giuseppe Attardi and Tito Flagella}, Title = {Memory Management in the PoSSo Solver}, Journal = {J. Symb. Comput.}, Volume = {21}, Number = {3}, Pages = {293--311}, Year = {1996}, URL = {https://doi.org/10.1006/jsco.1996.0013}, DOI = {10.1006/jsco.1996.0013}, timestamp = {Tue, 06 Jun 2017 22:26:33 +0200}, biburl = {https://dblp.org/rec/bib/journals/jsc/AttardiF96}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ Spectrum50, Author = {Trust Collections}, Title = {Introduction to Spectrum 5.0}, HowPublished = {\url{https://collectionstrust.org.uk/spectrum/spectrum-5/}}, Annote = {Accessed 24 Sep 2019} } @Article{ GignoniGadducciCEP2019, Author = {Giovanni A. Cignoni and Fabio Gadducci}, Title = {Retracing and assessing the {CEP} project}, Journal = {CoRR}, Volume = {abs/1904.00944}, Year = {2019}, URL = {http://arxiv.org/abs/1904.00944}, ArchivePrefix = {arXiv}, EPrint = {1904.00944}, timestamp = {Wed, 24 Apr 2019 12:21:25 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-00944.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ Buchberger1976, Author = {Buchberger, B.}, Title = {A Theoretical Basis for the Reduction of Polynomials to Canonical Forms}, Year = {1976}, issue_date = {August 1976}, Publisher = {Association for Computing Machinery}, Address = {New York, NY, USA}, Volume = {10}, Number = {3}, ISSN = {0163-5824}, URL = {https://doi.org/10.1145/1088216.1088219}, DOI = {10.1145/1088216.1088219}, Journal = {SIGSAM Bull.}, Month = aug, Pages = {19–29}, numpages = {11} } @Article{ prana2019readme, Author = {Gede Artha Azriadi Prana and Christoph Treude and Ferdian Thung and Thushari Atapattu and David Lo}, Title = {Categorizing the Content of GitHub {README} Files}, Journal = {Empirical Software Engineering}, Volume = {24}, Number = {3}, Pages = {1296--1327}, Year = {2019}, URL = {https://doi.org/10.1007/s10664-018-9660-3}, DOI = {10.1007/s10664-018-9660-3}, timestamp = {Wed, 25 Sep 2019 17:57:13 +0200}, biburl = {https://dblp.org/rec/journals/ese/PranaTTAL19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ ducasse2007identifiers, Author = {Adrian Kuhn and St{\'{e}}phane Ducasse and Tudor G{\^{\i}}rba}, Title = {Semantic clustering: Identifying topics in source code}, Journal = {Inf. Softw. Technol.}, Volume = {49}, Number = {3}, Pages = {230--243}, Year = {2007}, URL = {https://doi.org/10.1016/j.infsof.2006.10.017}, DOI = {10.1016/j.infsof.2006.10.017}, timestamp = {Thu, 20 Feb 2020 13:20:31 +0100}, biburl = {https://dblp.org/rec/journals/infsof/KuhnDG07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ bavota2016technicaldebt, Author = {Gabriele Bavota and Barbara Russo}, Title = {A large-scale empirical study on self-admitted technical debt}, BookTitle = {Proceedings of the 13th International Conference on Mining Software Repositories, {MSR} 2016, Austin, TX, USA, May 14-22, 2016}, Pages = {315--326}, Year = {2016}, CrossRef = {DBLP:conf/msr/2016}, URL = {https://doi.org/10.1145/2901739.2901742}, DOI = {10.1145/2901739.2901742}, timestamp = {Tue, 06 Nov 2018 16:57:14 +0100}, biburl = {https://dblp.org/rec/conf/msr/BavotaR16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/msr/2016, Editor = {Miryung Kim and Romain Robbes and Christian Bird}, Title = {Proceedings of the 13th International Conference on Mining Software Repositories, {MSR} 2016, Austin, TX, USA, May 14-22, 2016}, Publisher = {{ACM}}, Year = {2016}, URL = {https://doi.org/10.1145/2901739}, DOI = {10.1145/2901739}, ISBN = {978-1-4503-4186-8}, timestamp = {Tue, 06 Nov 2018 16:57:14 +0100}, biburl = {https://dblp.org/rec/conf/msr/2016.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ khandelwal2017zipg, Title = {{ZipG}: A memory-efficient graph store for interactive queries}, Author = {Khandelwal, Anurag and Yang, Zongheng and Ye, Evan and Agarwal, Rachit and Stoica, Ion}, BookTitle = {Proceedings of the 2017 ACM International Conference on Management of Data}, Pages = {1149--1164}, Year = {2017} } @InProceedings{ holzschuher2013queryperf, Title = {Performance of graph query languages: comparison of cypher, gremlin and native access in Neo4j}, Author = {Holzschuher, Florian and Peinl, Ren{\'e}}, BookTitle = {Proceedings of the Joint EDBT/ICDT 2013 Workshops}, Pages = {195--204}, Year = {2013} } @InProceedings{ miller2013neo4j, Title = {Graph database applications and concepts with Neo4j}, Author = {Miller, Justin J.}, BookTitle = {Proceedings of the Southern Association for Information Systems Conference}, Year = {2013} } @Article{ vigna2015webstruct, Title = {The graph structure in the web--analyzed on different aggregation levels}, Author = {Meusel, Robert and Vigna, Sebastiano and Lehmberg, Oliver and Bizer, Christian}, Journal = {The Journal of Web Science}, Volume = {1}, Year = {2015} } @Article{ watts1998smallworld, Title = {Collective dynamics of ``small-world'' networks}, Author = {Watts, Duncan J and Strogatz, Steven H}, Journal = {Nature}, Volume = {393}, Number = {6684}, Pages = {440}, Year = {1998}, Publisher = {Nature Publishing Group} } @Article{ newman2001prefattach, Title = {Clustering and preferential attachment in growing networks}, Author = {Newman, Mark EJ}, Journal = {Physical review E}, Volume = {64}, Number = {2}, Pages = {025102}, Year = {2001}, Publisher = {APS} } @Article{ scarselli2008gnn, Title = {The graph neural network model}, Author = {Scarselli, Franco and Gori, Marco and Tsoi, Ah Chung and Hagenbuchner, Markus and Monfardini, Gabriele}, Journal = {IEEE Transactions on Neural Networks}, Volume = {20}, Number = {1}, Pages = {61--80}, Year = {2008}, Publisher = {IEEE} } @InProceedings{ vasilescu2015quality, Author = {Bogdan Vasilescu and Yue Yu and Huaimin Wang and Premkumar T. Devanbu and Vladimir Filkov}, Title = {Quality and productivity outcomes relating to continuous integration in GitHub}, BookTitle = {Proceedings of the 2015 10th Joint Meeting on Foundations of Software Engineering, {ESEC/FSE} 2015, Bergamo, Italy, August 30 - September 4, 2015}, Pages = {805--816}, Year = {2015}, CrossRef = {DBLP:conf/sigsoft/2015}, URL = {https://doi.org/10.1145/2786805.2786850}, DOI = {10.1145/2786805.2786850}, timestamp = {Sat, 19 Oct 2019 20:33:08 +0200}, biburl = {https://dblp.org/rec/conf/sigsoft/VasilescuYWDF15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Proceedings{ DBLP:conf/sigsoft/2015, Editor = {Elisabetta Di Nitto and Mark Harman and Patrick Heymans}, Title = {Proceedings of the 2015 10th Joint Meeting on Foundations of Software Engineering, {ESEC/FSE} 2015, Bergamo, Italy, August 30 - September 4, 2015}, Publisher = {{ACM}}, Year = {2015}, URL = {https://doi.org/10.1145/2786805}, DOI = {10.1145/2786805}, ISBN = {978-1-4503-3675-8}, timestamp = {Tue, 06 Nov 2018 16:59:22 +0100}, biburl = {https://dblp.org/rec/conf/sigsoft/2015.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ gu2018deepcodesearch, Author = {Xiaodong Gu and Hongyu Zhang and Sunghun Kim}, Title = {Deep code search}, BookTitle = {Proceedings of the 40th International Conference on Software Engineering, {ICSE} 2018, Gothenburg, Sweden, May 27 - June 03, 2018}, Pages = {933--944}, Year = {2018}, Publisher = {{ACM}}, URL = {https://doi.org/10.1145/3180155.3180167}, DOI = {10.1145/3180155.3180167}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/icse/GuZ018.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ Lamprecht2019, Author = {Lamprecht, Anna-Lena and Garcia, Leyla and Kuzak, Mateusz and Martinez, Carlos and Arcila, Ricardo and Martin Del Pico, Eva and Dominguez Del Angel, Victoria and van de Sandt, Stephanie and Ison, Jon and Martinez, Paula Andrea and McQuilton, Peter and Valencia, Alfonso and Harrow, Jennifer and Psomopoulos, Fotis and Gelpi, Josep Ll and Chue Hong, Neil and Goble, Carole and Capella-Gutierrez, Salvador}, Title = {Towards FAIR principles for research software}, Year = {2019}, Publisher = {IOS Press}, Volume = {Preprint}, Pages = {1-23}, Keywords = {reproducible research}, Abstract = {The FAIR Guiding Principles, published in 2016, aim to improve the findability, accessibility, interoperability and reusability of digital research objects for both humans and machines. Until now the FAIR principles have been mostly applied to research data. The ideas behind these principles are, however, also directly relevant to research software. Hence there is a distinct need to explore how the FAIR principles can be applied to software. In this work, we aim to summarize the current status of the debate around FAIR and software, as basis for the development of community-agreed principles for FAIR research software in the future. We discuss what makes software different from data with regard to the application of the FAIR principles, and which desired characteristics of research software go beyond FAIR. Then we present an analysis of where the existing principles can directly be applied to software, where they need to be adapted or reinterpreted, and where the definition of additional principles is required. Here interoperability has proven to be the most challenging principle, calling for particular attention in future discussions. Finally, we outline next steps on the way towards definite FAIR principles for research software.}, Note = {Preprint}, ISSN = {2451-8492}, DOI = {10.3233/DS-190026} } @Unpublished{ dicosmo:hal-02475835, Title = {{Curated Archiving of Research Software Artifacts : lessons learned from the French open archive (HAL)}}, Author = {Di Cosmo, Roberto and Gruenpeter, Morane and Marmol, Bruno P and Monteil, Alain and Romary, Laurent and Sadowska, Jozefina}, URL = {https://hal.inria.fr/hal-02475835}, Note = {Presented at the International Digital Curation Conference, submitted to IJDC}, Year = 2019, Month = dec, Keywords = {Software Heritage ; HAL ; CCSD ; IES Inria ; legacy ; preservation ; reproducibility ; software curation ; software metadata ; PID}, PDF = {https://hal.inria.fr/hal-02475835/file/ijdc-HAL-CCSD-SWH-2019-12-16.pdf}, hal_id = {hal-02475835}, hal_version = {v1} } @InProceedings{ swmath, Author = {Sebastian B{\"{o}}nisch and Michael Brickenstein and Hagen Chrapary and Gert{-}Martin Greuel and Wolfram Sperber}, Title = {swMATH - {A} New Information Service for Mathematical Software}, BookTitle = {MKM/Calculemus/DML}, Series = {Lecture Notes in Computer Science}, Volume = {7961}, Pages = {369--373}, Publisher = {Springer}, Year = {2013} } @Misc{ SPDXLicences, Author = {{SPDX Workgroup}}, Title = {Software Package Data Exchange Licence List}, URL = {https://spdx.org/license-list}, Year = {2019}, Note = {\url{https://spdx.org/license-list}, retrieved 30 March 2020} } @Unpublished{ swh-archive-guide, Title = {{How to use Software Heritage for archiving and referencing your source code: guidelines and walkthrough}}, Author = {Di Cosmo, Roberto}, URL = {https://hal.archives-ouvertes.fr/hal-02263344}, Note = {Available at \url{https://hal.archives-ouvertes.fr/hal-02263344}}, Year = {2019}, Month = apr, Keywords = {source code ; software archival ; reproducibility ; open science ; guidelines}, PDF = {https://hal.archives-ouvertes.fr/hal-02263344/file/swh-archive-reference-howto.pdf}, hal_id = {hal-02263344}, hal_version = {v1} } @Article{ DaneluttoDiCosmo2020, Author = {Di Cosmo, Roberto and Danelutto, Marco}, Title = {{[Rp] Reproducing and replicating the OCamlP3l experiment}}, Journal = {ReScience C}, Year = {2020}, Month = apr, Volume = {6}, Number = {1}, Pages = {{#2}}, DOI = {10.5281/zenodo.3763416}, URL = {https://zenodo.org/record/3763416/files/article.pdf}, urllocal = "http://www.dicosmo.org/Articles/2020-ReScienceC.pdf", code_url = {https://archive.softwareheritage.org/swh:1:rev:2db189928c94d62a3b4757b3eec68f0a4d4113f0;origin=https://gitorious.org/ocamlp3l/ocamlp3l_cvs.git/}, code_swh = {swh:1:rev:2db189928c94d62a3b4757b3eec68f0a4d4113f0;origin=https://gitorious.org/ocamlp3l/ocamlp3l_cvs.git/}, review_url = {https://github.com/ReScience/submissions/issues/22}, Type = {Reproduction}, Language = {OCaml}, domain = {parallel programming}, dmi-category = {jour}, x-topic = "opensc", Keywords = {rescience c}, Abstract = {This article provides a full report on the effort to reproduce the work described in the article “Parallel Functional Programming with Skeletons: the OCamlP3L experiment”, written in 1998. It presented OCamlP3L, a parallel programming system written in the OCaml programming language. It turns out that we found the source code of the OCamlP3L system only in Software Heritage: since it was saved with all its development history, we could perform this reproduction experiment.} } @Online{ swhids, Author = {Software Heritage}, Title = {SoftWare Heritage persistent IDentifiers (SWHIDs)}, Date = {2020-05-05}, URL = {https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html}, Version = {1.4} } @InProceedings{ swhicms2020, Title = {{Archiving and referencing source code with Software Heritage}}, Author = {Di Cosmo, Roberto}, URL = {https://hal.archives-ouvertes.fr/hal-02526083}, Note = {to appear, preprint available}, Year = {2020}, PDF = {https://hal.archives-ouvertes.fr/hal-02526083/file/main.pdf}, BookTitle = {{ICMS}}, Series = {Lecture Notes in Computer Science}, Publisher = {Springer} } @Article{ IJDC-conf-2020, DOI = {10.2218/ijdc.v15i1.698}, URL = {https://doi.org/10.2218/ijdc.v15i1.698}, Year = {2020}, Month = aug, Publisher = {Edinburgh University Library}, Volume = {15}, Number = {1}, Pages = {16}, Author = { Di Cosmo, Roberto and Morane Gruenpeter and Bruno Marmol and Alain Monteil and Laurent Romary and Jozefina Sadowska}, Title = {Curated Archiving of Research Software Artifacts: Lessons Learned from the French Open Archive ({HAL})}, Journal = {International Journal of Digital Curation} } @TechReport{ gplo-note-2020, Title = {{Encouraging a wider usage of software derived from research}}, Author = {Cl{\'e}ment-Fontaine, M{\'e}lanie and Di Cosmo, Roberto and Guerry, Bastien and Moreau, Patrick and Pellegrini, Fran{\c c}ois}, URL = {https://hal.archives-ouvertes.fr/hal-02545142}, Type = {Research Report}, Institution = {{Committee for Open Science's Free Software and Open Source Project Group}}, Year = {2019}, Month = nov, PDF = {https://hal.archives-ouvertes.fr/hal-02545142/file/Opportunity%20Note_software%20derived%20from%20research_EN%20pm.pdf}, hal_id = {hal-02545142}, hal_version = {v1} } @InProceedings{ rodriguez2015gremlin, Title = {The gremlin graph traversal machine and language (invited talk)}, Author = {Rodriguez, Marko A}, BookTitle = {Proceedings of the 15th Symposium on Database Programming Languages}, Pages = {1--10}, Year = {2015} } @Article{ ieee-sw-gender-swh, Author = {Stefano Zacchiroli}, Title = {Gender Differences in Public Code Contributions: a 50-year Perspective}, Publisher = {IEEE Computer Society}, Year = {2021}, ISSN = {0740-7459}, DOI = {10.1109/MS.2020.3038765}, Pages = {45-50}, Volume = {38}, Number = {2}, Journal = {IEEE Software} } @Article{ ieee-computer-continuous-compliance, Author = {Simon Phipps and Stefano Zacchiroli}, Title = {Continuous Open Source License Compliance}, Publisher = {IEEE Computer Society}, Year = {2020}, ISSN = {0018-9162}, DOI = {10.1109/MC.2020.3024403}, Pages = {115-119}, Volume = {53}, Number = {12}, Journal = {IEEE Computer} } @InProceedings{ swh-fuse-icse2021, Author = {Thibault Allançon and Antoine Pietri and Stefano Zacchiroli}, Title = {The Software Heritage Filesystem (SwhFS): Integrating Source Code Archival with Development}, Publisher = {IEEE}, Year = {2021}, BookTitle = {ICSE 2021: The 43rd International Conference on Software Engineering} } @InProceedings{ file-type-detection-swh, Author = {Del Bonifro, Francesca and Maurizio Gabbrielli and Stefano Zacchiroli}, Title = {Content-Based Textual File Type Detection at Scale}, Publisher = {ACM}, Year = {2021}, BookTitle = {ICMLC 2021: The 13th International Conference on Machine Learning and Computing} } @Article{ singh2010smallworldcollab, Author = {Param Vir Singh}, Title = {The small-world effect: The influence of macro-level properties of developer collaboration networks on open-source project success}, Journal = {{ACM} Trans. Softw. Eng. Methodol.}, Volume = {20}, Number = {2}, Pages = {6:1--6:27}, Year = {2010}, URL = {https://doi.org/10.1145/1824760.1824763}, DOI = {10.1145/1824760.1824763}, timestamp = {Tue, 06 Nov 2018 12:51:20 +0100}, biburl = {https://dblp.org/rec/journals/tosem/Singh10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ mockus2020complete, Author = {Audris Mockus and Diomidis Spinellis and Zoe Kotti and Gabriel John Dusing}, Title = {A Complete Set of Related Git Repositories Identified via Community Detection Approaches Based on Shared Commits}, BookTitle = {{MSR} '20: 17th International Conference on Mining Software Repositories}, Pages = {513--517}, Publisher = {{ACM}}, Year = {2020}, URL = {https://doi.org/10.1145/3379597.3387499}, DOI = {10.1145/3379597.3387499}, timestamp = {Tue, 29 Dec 2020 18:36:57 +0100}, biburl = {https://dblp.org/rec/conf/msr/MockusSKD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ singh2010smallworldcollab, Author = {Param Vir Singh}, Title = {The small-world effect: The influence of macro-level properties of developer collaboration networks on open-source project success}, Journal = {{ACM} Trans. Softw. Eng. Methodol.}, Volume = {20}, Number = {2}, Pages = {6:1--6:27}, Year = {2010}, URL = {https://doi.org/10.1145/1824760.1824763}, DOI = {10.1145/1824760.1824763}, timestamp = {Tue, 06 Nov 2018 12:51:20 +0100}, biburl = {https://dblp.org/rec/journals/tosem/Singh10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ valverde2003hierarchical, Title = {Hierarchical small worlds in software architecture}, Author = {Valverde, Sergi and Sol{\'e}, Ricard V}, Journal = {arXiv preprint cond-mat/0307278}, Year = {2003} } @Article{ myers2003software, Title = {Software systems as complex networks: Structure, function, and evolvability of software collaboration graphs}, Author = {Myers, Christopher R}, Journal = {Physical Review E}, Volume = {68}, Number = {4}, Pages = {046116}, Year = {2003}, Publisher = {APS} } @InProceedings{ hassan2004revengsmallworld, Author = {Ahmed E. Hassan and Richard C. Holt}, Title = {The Small World of Software Reverse Engineering}, BookTitle = {11th Working Conference on Reverse Engineering, {WCRE} 2004}, Pages = {278--283}, Publisher = {{IEEE} Computer Society}, Year = {2004}, URL = {https://doi.org/10.1109/WCRE.2004.37}, DOI = {10.1109/WCRE.2004.37}, timestamp = {Wed, 16 Oct 2019 14:14:53 +0200}, biburl = {https://dblp.org/rec/conf/wcre/HassanH04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Book{ demeyer2008softwareevolution, Title = {Software Evolution}, Author = {Demeyer, Serge and Mens, Tom}, Year = {2008}, Publisher = {Springer} } @Article{ generalindex2021naturenews, Author = {Else, Holly}, Title = {Giant, free index to world's research papers released online}, Journal = {Nature}, Year = {2021}, Month = {Oct}, Publisher = {Nature Publishing Group}, DOI = {10.1038/d41586-021-02895-8}, HowPublished = {Available online at \url{https://www.nature.com/articles/d41586-021-02895-8}, accessed 2021-12-15} } @InProceedings{ ley2002dblp, Author = {Michael Ley}, Editor = {Alberto H. F. Laender and Arlindo L. Oliveira}, Title = {The {DBLP} Computer Science Bibliography: Evolution, Research Issues, Perspectives}, BookTitle = {String Processing and Information Retrieval, 9th International Symposium, {SPIRE} 2002, Lisbon, Portugal, September 11-13, 2002, Proceedings}, Series = {Lecture Notes in Computer Science}, Volume = {2476}, Pages = {1--10}, Publisher = {Springer}, Year = {2002}, URL = {https://doi.org/10.1007/3-540-45735-6\_1}, DOI = {10.1007/3-540-45735-6\_1}, timestamp = {Wed, 29 May 2019 10:39:45 +0200}, biburl = {https://dblp.org/rec/conf/spire/Ley02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ stonebraker1991postgres, Author = {Michael Stonebraker and Greg Kemnitz}, Title = {The Postgres Next Generation Database Management System}, Journal = {Commun. {ACM}}, Volume = {34}, Number = {10}, Pages = {78--92}, Year = {1991}, URL = {https://doi.org/10.1145/125223.125262}, DOI = {10.1145/125223.125262}, timestamp = {Tue, 06 Nov 2018 12:51:34 +0100}, biburl = {https://dblp.org/rec/journals/cacm/StonebrakerK91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ menzies2018swetrends, Author = {Mathew, George and Agrawal, Amritanshu and Menzies, Tim}, Journal = {{IEEE} Transactions on Software Engineering}, Title = {Finding Trends in Software Research}, Year = {2018}, Volume = {}, Number = {}, Pages = {}, Note = {To appear}, DOI = {10.1109/TSE.2018.2870388} } @InProceedings{ sun2016topicmodels, Author = {Xiaobing Sun and Xiangyue Liu and Bin Li and Yucong Duan and Hui Yang and Jiajun Hu}, Editor = {Yihai Chen}, Title = {Exploring topic models in software engineering data analysis: {A} survey}, BookTitle = {17th {IEEE/ACIS} International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel/Distributed Computing, {SNPD} 2016, Shanghai, China, May 30 - June 1, 2016}, Pages = {357--362}, Publisher = {{IEEE} Computer Society}, Year = {2016}, URL = {https://doi.org/10.1109/SNPD.2016.7515925}, DOI = {10.1109/SNPD.2016.7515925}, timestamp = {Tue, 14 Dec 2021 16:33:32 +0100}, biburl = {https://dblp.org/rec/conf/snpd/SunLLDYH16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ sahito2019swengpubtrends, Author = {Sahito, Sanam Fayaz and Gilal, Abdul Rehman and Abro, Rizwan Ali and Waqas, Ahmad and Shaikh, Khisaluddin}, BookTitle = {2019 13th International Conference on Mathematics, Actuarial Science, Computer Science and Statistics (MACS)}, Title = {Research Publication Trends in Software Engineering}, Year = {2019}, Volume = {}, Number = {}, Pages = {1-4}, DOI = {10.1109/MACS48846.2019.9024767} } @InProceedings{ demeyer2013msrtrends, Author = {Serge Demeyer and Alessandro Murgia and Kevin Wyckmans and Ahmed Lamkanfi}, Editor = {Thomas Zimmermann and Massimiliano Di Penta and Sunghun Kim}, Title = {Happy birthday! a trend analysis on past {MSR} papers}, BookTitle = {Proceedings of the 10th Working Conference on Mining Software Repositories, {MSR} '13, San Francisco, CA, USA, May 18-19, 2013}, Pages = {353--362}, Publisher = {{IEEE} Computer Society}, Year = {2013}, URL = {https://doi.org/10.1109/MSR.2013.6624049}, DOI = {10.1109/MSR.2013.6624049}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/msr/DemeyerMWL13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ farias2016msrmapping, Author = {M{\'{a}}rio Andr{\'{e}} de Freitas Farias and Renato Lima Novais and Methanias Cola{\c{c}}o J{\'{u}}nior and Luis Paulo da Silva Carvalho and Manoel G. Mendon{\c{c}}a and Rodrigo Oliveira Sp{\'{\i}}nola}, Editor = {Sascha Ossowski}, Title = {A systematic mapping study on mining software repositories}, BookTitle = {Proceedings of the 31st Annual {ACM} Symposium on Applied Computing, Pisa, Italy, April 4-8, 2016}, Pages = {1472--1479}, Publisher = {{ACM}}, Year = {2016}, URL = {https://doi.org/10.1145/2851613.2851786}, DOI = {10.1145/2851613.2851786}, timestamp = {Sun, 25 Oct 2020 22:54:24 +0100}, biburl = {https://dblp.org/rec/conf/sac/FariasNJCMS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ gurcan2020distancelearningtrends, Author = {Gurcan, Fatih and Cagiltay, Nergiz Ercil}, Title = {{Research trends on distance learning: a text mining-based literature review from 2008 to 2018}}, Journal = {Interactive Learning Environments}, Pages = {1--22}, Year = {2020}, Month = {Sep}, ISSN = {1049-4820}, Publisher = {Routledge}, DOI = {10.1080/10494820.2020.1815795} } @Article{ ioannidis2015metaresearch, Author = {Ioannidis, John P. A. and Fanelli, Daniele and Dunne, Debbie Drake and Goodman, Steven N.}, Title = {Meta-research: Evaluation and Improvement of Research Methods and Practices}, Journal = {PLoS Biol.}, Volume = {13}, Number = {10}, Year = {2015}, Month = {Oct}, Publisher = {Public Library of Science}, DOI = {10.1371/journal.pbio.1002264} } @Article{ ioannidis2010metaresearch, Author = {Ioannidis, John P. A.}, Title = {Meta-research: The art of getting it wrong}, Journal = {Res. Synth. Methods}, Volume = {1}, Number = {3-4}, Pages = {169--184}, Year = {2010}, Month = {Jul}, ISSN = {1759-2879}, Publisher = {John Wiley {\&} Sons, Ltd}, DOI = {10.1002/jrsm.19} } @InProceedings{ kitchenham2004ebse, Author = {Barbara A. Kitchenham and Tore Dyb{\aa} and Magne J{\o}rgensen}, Editor = {Anthony Finkelstein and Jacky Estublier and David S. Rosenblum}, Title = {Evidence-Based Software Engineering}, BookTitle = {26th International Conference on Software Engineering {(ICSE} 2004), 23-28 May 2004, Edinburgh, United Kingdom}, Pages = {273--281}, Publisher = {{IEEE} Computer Society}, Year = {2004}, URL = {https://doi.org/10.1109/ICSE.2004.1317449}, DOI = {10.1109/ICSE.2004.1317449}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/icse/KitchenhamDJ04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ kitchenham2009slrslr, Author = {Barbara A. Kitchenham and Pearl Brereton and David Budgen and Mark Turner and John Bailey and Stephen G. Linkman}, Title = {Systematic literature reviews in software engineering - {A} systematic literature review}, Journal = {Inf. Softw. Technol.}, Volume = {51}, Number = {1}, Pages = {7--15}, Year = {2009}, URL = {https://doi.org/10.1016/j.infsof.2008.09.009}, DOI = {10.1016/j.infsof.2008.09.009}, timestamp = {Mon, 14 Sep 2020 16:49:35 +0200}, biburl = {https://dblp.org/rec/journals/infsof/KitchenhamBBTBL09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ huang2020openaccess, Author = {Huang, Chun-Kai (Karl) and Neylon, Cameron and Hosking, Richard and Montgomery, Lucy and Wilson, Katie S. and Ozaygen, Alkim and Brookes-Kenworthy, Chloe}, Title = {Meta-Research: Evaluating the impact of open access policies on research institutions}, Journal = {eLife}, Year = {2020}, Month = {Sep}, Publisher = {eLife Sciences Publications, Ltd}, DOI = {10.7554/eLife.57067} } @Article{ pina2021mariecurie, Author = {Pina, David G. and Buljan, Ivan and Hren, Darko and Maru{\ifmmode\check{s}\else\v{s}\fi}i{\ifmmode\acute{c}\else\'{c}\fi}, Ana}, Title = {Meta-Research: A retrospective analysis of the peer review of more than 75,000 Marie Curie proposals between 2007 and 2018}, Journal = {eLife}, Year = {2021}, Month = {Jan}, Publisher = {eLife Sciences Publications, Ltd}, DOI = {10.7554/eLife.59338} } @Article{ alperin2019public, Author = {Alperin, Juan P. and Nieves, Carol Mu{\ifmmode\tilde{n}\else\~{n}\fi}oz and Schimanski, Lesley A. and Fischman, Gustavo E. and Niles, Meredith T. and McKiernan, Erin C.}, Title = {Meta-Research: How significant are the public dimensions of faculty work in review, promotion and tenure documents?}, Journal = {eLife}, Year = {2019}, Month = {Feb}, Publisher = {eLife Sciences Publications, Ltd}, DOI = {10.7554/eLife.42254} } @Article{ andersen2020women, Author = {Andersen, Jens Peter and Nielsen, Mathias Wullum and Simone, Nicole L. and Lewiss, Resa E. and Jagsi, Reshma}, Title = {Meta-Research: {COVID-19} medical papers have fewer women first authors than expected}, Journal = {eLife}, Year = {2020}, Month = {Jun}, Publisher = {eLife Sciences Publications, Ltd}, DOI = {10.7554/eLife.58807} } @Article{ suber2003openaccess, Title = {Removing the barriers to research: an introduction to open access for librarians}, Author = {Suber, Peter}, Journal = {College \& research libraries news}, Year = {2003}, Pages = {92-94}, Volume = {64}, Publisher = {Association of College and Research Libraries}, Note = {available at \url{https://dash.harvard.edu/bitstream/handle/1/3715477/suber_crln.html}} } @Article{ vine2006googlescholar, Author = {Vine, Rita}, Title = {{Google Scholar}}, Journal = {J. Med. Libr. Assoc.}, Volume = {94}, Number = {1}, Pages = {97}, Year = {2006}, Month = {Jan}, Publisher = {Medical Library Association}, URL = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1324783} } @Article{ halevi2017googlescholar, Author = {Gali Halevi and Henk F. Moed and Judit Bar{-}Ilan}, Title = {Suitability of Google Scholar as a source of scientific information and as a source of data for scientific evaluation - Review of the Literature}, Journal = {J. Informetrics}, Volume = {11}, Number = {3}, Pages = {823--834}, Year = {2017}, URL = {https://doi.org/10.1016/j.joi.2017.06.005}, DOI = {10.1016/j.joi.2017.06.005}, timestamp = {Fri, 30 Nov 2018 13:19:33 +0100}, biburl = {https://dblp.org/rec/journals/joi/HaleviMB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ vasilescu2013sweconf, Author = {Bogdan Vasilescu and Alexander Serebrenik and Tom Mens}, Editor = {Thomas Zimmermann and Massimiliano Di Penta and Sunghun Kim}, Title = {A historical dataset of software engineering conferences}, BookTitle = {Proceedings of the 10th Working Conference on Mining Software Repositories, {MSR} '13, San Francisco, CA, USA, May 18-19, 2013}, Pages = {373--376}, Publisher = {{IEEE} Computer Society}, Year = {2013}, URL = {https://doi.org/10.1109/MSR.2013.6624051}, DOI = {10.1109/MSR.2013.6624051}, timestamp = {Sun, 25 Oct 2020 23:02:32 +0100}, biburl = {https://dblp.org/rec/conf/msr/VasilescuSM13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ kotti2020msrdatapapers, Author = {Zoe Kotti and Konstantinos Kravvaritis and Konstantina Dritsa and Diomidis Spinellis}, Title = {Standing on shoulders or feet? An extended study on the usage of the {MSR} data papers}, Journal = {Empir. Softw. Eng.}, Volume = {25}, Number = {5}, Pages = {3288--3322}, Year = {2020}, URL = {https://doi.org/10.1007/s10664-020-09834-7}, DOI = {10.1007/s10664-020-09834-7}, timestamp = {Mon, 26 Oct 2020 08:54:06 +0100}, biburl = {https://dblp.org/rec/journals/ese/KottiKDS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ tkaczyk2018machine, Title = {Machine learning vs. rules and out-of-the-box vs. retrained: An evaluation of open-source bibliographic reference and citation parsers}, Author = {Tkaczyk, Dominika and Collins, Andrew and Sheridan, Paraic and Beel, Joeran}, BookTitle = {Proceedings of the 18th ACM/IEEE on joint conference on digital libraries}, Pages = {99--108}, Year = {2018} } @Article{ tkaczyk2015cermine, Author = {Dominika Tkaczyk and Pawel Szostek and Mateusz Fedoryszak and Piotr Jan Dendek and Lukasz Bolikowski}, Title = {{CERMINE:} automatic extraction of structured metadata from scientific literature}, Journal = {Int. J. Document Anal. Recognit.}, Volume = {18}, Number = {4}, Pages = {317--335}, Year = {2015}, URL = {https://doi.org/10.1007/s10032-015-0249-8}, DOI = {10.1007/s10032-015-0249-8}, timestamp = {Thu, 13 Aug 2020 12:42:15 +0200}, biburl = {https://dblp.org/rec/journals/ijdar/TkaczykSFDB15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ santamaria2018genderapi, Author = {Luc{\'{\i}}a Santamar{\'{\i}}a and Helena Mihaljevic}, Title = {Comparison and benchmark of name-to-gender inference services}, Journal = {PeerJ Computer Science}, Volume = {4}, Pages = {e156}, Year = {2018}, URL = {https://doi.org/10.7717/peerj-cs.156}, DOI = {10.7717/peerj-cs.156}, timestamp = {Sat, 19 Oct 2019 19:13:33 +0200}, biburl = {https://dblp.org/rec/journals/peerj-cs/SantamariaM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ ishida2011namesaroundtheworld, Author = {Richard Ishida}, Title = {Personal names around the world}, Institution = {{W3C}}, Year = {2011}, HowPublished = {\url{https://www.w3.org/International/questions/qa-personal-names}} } @Article{ terrell2017gender, Title = {Gender differences and bias in open source: Pull request acceptance of women versus men}, Author = {Terrell, Josh and Kofink, Andrew and Middleton, Justin and Rainear, Clarissa and Murphy-Hill, Emerson and Parnin, Chris and Stallings, Jon}, Journal = {PeerJ Computer Science}, Volume = {3}, Pages = {e111}, Year = {2017}, Publisher = {PeerJ Inc.} } @Article{ nafus2012patches, Title = {Patches don't have gender: What is not open in open source software}, Author = {Nafus, Dawn}, Journal = {New Media \& Society}, Volume = {14}, Number = {4}, Pages = {669--683}, Year = {2012}, Publisher = {Sage Publications Sage UK: London, England} } @Article{ vasilescu2014gender, Title = {Gender, representation and online participation: A quantitative study}, Author = {Vasilescu, Bogdan and Capiluppi, Andrea and Serebrenik, Alexander}, Journal = {Interacting with Computers}, Volume = {26}, Number = {5}, Pages = {488--511}, Year = {2014}, Publisher = {Oxford University Press} } @InProceedings{ vasilescu2015gender, Title = {Gender and tenure diversity in {GitHub} teams}, Author = {Vasilescu, Bogdan and Posnett, Daryl and Ray, Baishakhi and van den Brand, Mark GJ and Serebrenik, Alexander and Devanbu, Premkumar and Filkov, Vladimir}, BookTitle = {33rd annual {ACM} conference on human factors in computing systems, {CHI}'15}, Pages = {3789--3798}, Year = {2015} } @InProceedings{ robles2014surveydataset, Author = {Gregorio Robles and Laura {Arjona Reina} and Alexander Serebrenik and Bogdan Vasilescu and Jes{\'{u}}s M. Gonz{\'{a}}lez{-}Barahona}, Editor = {Premkumar T. Devanbu and Sung Kim and Martin Pinzger}, Title = {{FLOSS} 2013: a survey dataset about free software contributors: challenges for curating, sharing, and combining}, BookTitle = {11th Working Conference on Mining Software Repositories, {MSR} 2014, Proceedings, May 31 - June 1, 2014, Hyderabad, India}, Pages = {396--399}, Publisher = {{ACM}}, Year = {2014}, URL = {https://doi.org/10.1145/2597073.2597129}, DOI = {10.1145/2597073.2597129}, timestamp = {Tue, 06 Nov 2018 16:57:14 +0100}, biburl = {https://dblp.org/rec/conf/msr/RoblesRSVG14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ robles2016womeninfoss, Author = {Gregorio Robles and Laura {Arjona Reina} and Jes{\'{u}}s M. Gonz{\'{a}}lez{-}Barahona and Santiago Due{\~{n}}as Dom{\'{\i}}nguez}, Title = {Women in Free/Libre/Open Source Software: The Situation in the 2010s}, BookTitle = {12th International Conference on Open Source Systems, {OSS} 2016}, Series = {{IFIP} Advances in Information and Communication Technology}, Volume = {472}, Pages = {163--173}, Publisher = {Springer}, Year = {2016}, URL = {https://doi.org/10.1007/978-3-319-39225-7\_13}, DOI = {10.1007/978-3-319-39225-7\_13}, timestamp = {Sun, 02 Jun 2019 21:26:40 +0200}, biburl = {https://dblp.org/rec/conf/oss/RoblesRGD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Book{ hill2010whysofew, Title = {Why so few? Women in science, technology, engineering, and mathematics.}, Author = {Hill, Catherine and Corbett, Christianne and St Rose, Andresse}, Year = {2010}, Publisher = {ERIC} } @Book{ margolis2002womencs, Title = {Unlocking the clubhouse: Women in computing}, Author = {Margolis, Jane and Fisher, Allan}, Year = {2002}, Publisher = {MIT press} } @Article{ david2008fossdevs, Title = {Community-based production of open-source software: What do we know about the developers who participate?}, Author = {David, Paul A and Shapiro, Joseph S}, Journal = {Information Economics and Policy}, Volume = {20}, Number = {4}, Pages = {364--398}, Year = {2008}, Publisher = {Elsevier} } @InProceedings{ kuechler2012genderfoss, Author = {Victor Kuechler and Claire Gilbertson and Carlos Jensen}, Title = {Gender Differences in Early Free and Open Source Software Joining Process}, BookTitle = {8th International Conference on Open Source Systems, {OSS} 2012}, Series = {{IFIP} Advances in Information and Communication Technology}, Volume = {378}, Pages = {78--93}, Publisher = {Springer}, Year = {2012}, URL = {https://doi.org/10.1007/978-3-642-33442-9\_6}, DOI = {10.1007/978-3-642-33442-9\_6}, timestamp = {Tue, 26 Jun 2018 14:13:50 +0200}, biburl = {https://dblp.org/rec/conf/oss/KuechlerGJ12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ oneil2016debiansurvey, Author = {O'Neil, Mathieu and Mahin Raissi and Molly de Blanc and Stefano Zacchiroli}, Title = {Preliminary Report on the Influence of Capital in an Ethical-Modular Project: Quantitative data from the 2016 Debian Survey}, Abstract = {}, Year = {2017}, ISSN = {2213-5316}, Number = {10}, Journal = {Journal of Peer Production} } @InProceedings{ qiu2010kdewomen, Author = {Yixin Qiu and Katherine J. Stewart and Kathryn M. Bartol}, Title = {Joining and Socialization in Open Source Women's Groups: An Exploratory Study of \emph{KDE-Women}}, BookTitle = {6th International Conference on Open Source Systems, {OSS} 2010}, Series = {{IFIP} Advances in Information and Communication Technology}, Volume = {319}, Pages = {239--251}, Publisher = {Springer}, Year = {2010}, URL = {https://doi.org/10.1007/978-3-642-13244-5\_19}, DOI = {10.1007/978-3-642-13244-5\_19}, timestamp = {Thu, 12 Mar 2020 11:39:15 +0100}, biburl = {https://dblp.org/rec/conf/oss/QiuSB10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ boisvert2016acmbadges, Author = {Ronald F. Boisvert}, Title = {Incentivizing reproducibility}, Journal = {Commun. {ACM}}, Volume = {59}, Number = {10}, Pages = {5}, Year = {2016}, URL = {https://doi.org/10.1145/2994031}, DOI = {10.1145/2994031}, timestamp = {Tue, 06 Nov 2018 12:51:40 +0100}, biburl = {https://dblp.org/rec/journals/cacm/Boisvert16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ nagappan2009msr, Author = {Nachiappan Nagappan and Andreas Zeller and Thomas Zimmermann}, Title = {Guest Editors' Introduction: Mining Software Archives}, Journal = {{IEEE} Softw.}, Volume = {26}, Number = {1}, Pages = {24--25}, Year = {2009}, URL = {https://doi.org/10.1109/MS.2009.14}, DOI = {10.1109/MS.2009.14}, timestamp = {Mon, 08 Jun 2020 22:31:17 +0200}, biburl = {https://dblp.org/rec/journals/software/NagappanZZ09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Book{ felderer2020esebook, Editor = {Michael Felderer and Guilherme Horta Travassos}, Title = {Contemporary Empirical Methods in Software Engineering}, Publisher = {Springer}, Year = {2020}, URL = {https://doi.org/10.1007/978-3-030-32489-6}, DOI = {10.1007/978-3-030-32489-6}, ISBN = {978-3-030-32488-9}, timestamp = {Fri, 26 Mar 2021 14:31:04 +0100}, biburl = {https://dblp.org/rec/books/sp/20/FT2020.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Book{ shull2008eseguide, Editor = {Forrest Shull and Janice Singer and Dag I. K. Sj{\o}berg}, Title = {Guide to Advanced Empirical Software Engineering}, Publisher = {Springer}, Year = {2008}, URL = {https://doi.org/10.1007/978-1-84800-044-5}, DOI = {10.1007/978-1-84800-044-5}, ISBN = {9781848000438}, timestamp = {Wed, 07 Aug 2019 15:04:08 +0200}, biburl = {https://dblp.org/rec/books/sp/08/SSS2008.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ basili1999building, Title = {Building knowledge through families of experiments}, Author = {Basili, Victor R and Shull, Forrest and Lanubile, Filippo}, Journal = {IEEE Transactions on Software Engineering}, Volume = {25}, Number = {4}, Pages = {456--473}, Year = {1999}, Publisher = {IEEE} } @Article{ runeson2009guidelines, Title = {Guidelines for conducting and reporting case study research in software engineering}, Author = {Runeson, Per and H{\"o}st, Martin}, Journal = {Empirical Software Engineering}, Volume = {14}, Number = {2}, Pages = {131}, Year = {2009}, Publisher = {Springer} } @Article{ novais2013software, Title = {Software evolution visualization: A systematic mapping study}, Author = {Novais, Renato Lima and Torres, Andr{\'e} and Mendes, Thiago Souto and Mendon{\c{c}}a, Manoel and Zazworka, Nico}, Journal = {Information and Software Technology}, Volume = {55}, Number = {11}, Pages = {1860--1883}, Year = {2013}, Publisher = {Elsevier} } @Article{ kettunen2005stem, Title = {To stem or lemmatize a highly inflectional language in a probabilistic {IR} environment?}, Author = {Kettunen, Kimmo and Kunttu, Tuomas and J{\"a}rvelin, Kalervo}, Journal = {Journal of Documentation}, Year = {2005}, Publisher = {Emerald Group Publishing Limited} } @Article{ nazar2016summarizing, Title = {Summarizing software artifacts: A literature review}, Author = {Nazar, Najam and Hu, Yan and Jiang, He}, Journal = {Journal of Computer Science and Technology}, Volume = {31}, Number = {5}, Pages = {883--909}, Year = {2016}, Publisher = {Springer} } @Article{ soper2012ngram, Title = {An n-gram analysis of Communications 2000--2010}, Author = {Soper, Daniel S and Turel, Ofir}, Journal = {Communications of the ACM}, Volume = {55}, Number = {5}, Pages = {81--87}, Year = {2012}, Publisher = {ACM New York, NY, USA} } @Article{ moro2015business, Title = {Business intelligence in banking: A literature analysis from 2002 to 2013 using text mining and latent Dirichlet allocation}, Author = {Moro, S{\'e}rgio and Cortez, Paulo and Rita, Paulo}, Journal = {Expert Systems with Applications}, Volume = {42}, Number = {3}, Pages = {1314--1324}, Year = {2015}, Publisher = {Elsevier} } @Article{ nasar2018information, Title = {Information extraction from scientific articles: a survey}, Author = {Nasar, Zara and Jaffry, Syed Waqar and Malik, Muhammad Kamran}, Journal = {Scientometrics}, Volume = {117}, Number = {3}, Pages = {1931--1990}, Year = {2018}, Publisher = {Springer} } @Article{ ioannidis2016citation, Title = {Citation metrics: a primer on how (not) to normalize}, Author = {Ioannidis, John PA and Boyack, Kevin and Wouters, Paul F}, Journal = {PLoS biology}, Volume = {14}, Number = {9}, Pages = {e1002542}, Year = {2016}, Publisher = {Public Library of Science San Francisco, CA USA} } @Article{ bajpai2019encouraging, Title = {Encouraging Reproducibility in Scientific Research of the Internet}, Author = {Bajpai, Vaibhav and Bonaventure, Olivier and Claffy, Kimberly and Karrenberg, Daniel}, Journal = {Dagstuhl reports}, Volume = {8}, Number = {10}, Year = {2019} } @Article{ rodriguez2018reproducibility, Title = {Reproducibility and credibility in empirical software engineering: A case study based on a systematic literature review of the use of the szz algorithm}, Author = {Rodr{\'\i}guez-P{\'e}rez, Gema and Robles, Gregorio and Gonz{\'a}lez-Barahona, Jes{\'u}s M}, Journal = {Information and Software Technology}, Volume = {99}, Pages = {164--176}, Year = {2018}, Publisher = {Elsevier} } @InProceedings{ amann2013software, Author = {Sven Amann and Stefanie Beyer and Katja Kevic and Harald C. Gall}, Editor = {Bertrand Meyer and Martin Nordio}, Title = {Software Mining Studies: Goals, Approaches, Artifacts, and Replicability}, BookTitle = {Software Engineering - International Summer Schools, {LASER} 2013-2014, Elba, Italy, Revised Tutorial Lectures}, Series = {Lecture Notes in Computer Science}, Volume = {8987}, Pages = {121--158}, Publisher = {Springer}, Year = {2014}, URL = {https://doi.org/10.1007/978-3-319-28406-4\_5}, DOI = {10.1007/978-3-319-28406-4\_5}, timestamp = {Tue, 14 May 2019 10:00:44 +0200}, biburl = {https://dblp.org/rec/conf/laser/AmannBKG14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ louridas2012note, Title = {A note on rigour and replicability}, Author = {Louridas, Panos and Gousios, Georgios}, Journal = {ACM SIGSOFT Software Engineering Notes}, Volume = {37}, Number = {5}, Pages = {1--4}, Year = {2012}, Publisher = {ACM New York, NY, USA} } @InProceedings{ gundersen2018state, Title = {State of the art: Reproducibility in artificial intelligence}, Author = {Gundersen, Odd Erik and Kjensmo, Sigbj{\o}rn}, BookTitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, Volume = {32}, Number = {1}, Year = {2018} } @Article{ feitelson2015repeatability, Title = {From repeatability to reproducibility and corroboration}, Author = {Feitelson, Dror G}, Journal = {ACM SIGOPS Operating Systems Review}, Volume = {49}, Number = {1}, Pages = {3--11}, Year = {2015}, Publisher = {ACM New York, NY, USA} } @InProceedings{ yen2021ucsd, Title = {A {UCSD} view on replication and reproducibility for {CPS} \& {IoT}}, Author = {Yen, Alex and Flowers, Bryse and Luo, Wenshan and Nagesh, Nitish and Tueller, Peter and Kastner, Ryan and Pannuto, Pat}, BookTitle = {Proceedings of the Workshop on Benchmarking Cyber-Physical Systems and Internet of Things}, Pages = {20--25}, Year = {2021} } @InProceedings{ bajpai2017challenges, Title = {Challenges with reproducibility}, Author = {Bajpai, Vaibhav and K{\"u}hlewind, Mirja and Ott, J{\"o}rg and Sch{\"o}nw{\"a}lder, J{\"u}rgen and Sperotto, Anna and Trammell, Brian}, BookTitle = {Proceedings of the Reproducibility Workshop}, Pages = {1--4}, Year = {2017} } @Article{ jaeger2019reproducible, Title = {Reproducible measurements of {TCP BBR} congestion control}, Author = {Jaeger, Benedikt and Scholz, Dominik and Raumer, Daniel and Geyer, Fabien and Carle, Georg}, Journal = {Computer Communications}, Volume = {144}, Pages = {31--43}, Year = {2019}, Publisher = {Elsevier} } @Article{ papadopoulos2019methodological, Title = {Methodological principles for reproducible performance evaluation in cloud computing}, Author = {Papadopoulos, Alessandro Vittorio and Versluis, Laurens and Bauer, Andr{\'e} and Herbst, Nikolas and Von Kistowski, J{\'o}akim and Ali-Eldin, Ahmed and Abad, Cristina and Amaral, Jos{\'e} Nelson and T{\uu}ma, Petr and Iosup, Alexandru}, Journal = {IEEE Transactions on Software Engineering}, Year = {2019}, Publisher = {IEEE} } @Book{ kernighan2019unixhistory, Title = {{UNIX}: A History and a Memoir}, Author = {Kernighan, Brian W}, Year = {2019}, Publisher = {Independently published}, Note = {\url{https://www.cs.princeton.edu/~bwk/memoir.html}, accessed 2022-01-10} } @TechReport{ david2003floss, Title = {FLOSS-US the free/libre/open source software survey for 2003}, Author = {David, Paul A and Waterman, Andrew and Arora, Seema}, Institution = {Stanford Institute for Economic Policy Research}, Pages = {1--39}, Year = {2003} } @InCollection{ ghosh2005understanding, Title = {Understanding free software developers: Findings from the {FLOSS} study}, Author = {Ghosh, Rishab Aiyer}, BookTitle = {Perspectives on free and open source software}, Volume = {28}, Pages = {23--47}, Year = {2005}, Publisher = {{MIT} Press} } @Article{ wachs2021ossgeography, Author = {Johannes Wachs and Mariusz Nitecki and William Schueller and Axel Polleres}, Title = {The Geography of Open Source Software: Evidence from {GitHub}}, Journal = {CoRR}, Volume = {abs/2107.03200}, Year = {2021}, URL = {https://arxiv.org/abs/2107.03200}, EPrintType = {arXiv}, EPrint = {2107.03200}, timestamp = {Thu, 14 Oct 2021 09:13:55 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-03200.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ barahona2008geodiversity, Author = {Jes{\'{u}}s M. Gonz{\'{a}}lez{-}Barahona and Gregorio Robles and Roberto Andradas{-}Izquierdo and Rishab Aiyer Ghosh}, Title = {Geographic origin of libre software developers}, Journal = {Inf. Econ. Policy}, Volume = {20}, Number = {4}, Pages = {356--363}, Year = {2008}, URL = {https://doi.org/10.1016/j.infoecopol.2008.07.001}, DOI = {10.1016/j.infoecopol.2008.07.001}, timestamp = {Sat, 22 Feb 2020 13:47:15 +0100}, biburl = {https://dblp.org/rec/journals/iepol/Gonzalez-BarahonaRAG08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{ daniel2013ossdiversity, Author = {Sherae L. Daniel and Ritu Agarwal and Katherine J. Stewart}, Title = {The Effects of Diversity in Global, Distributed Collectives: {A} Study of Open Source Project Success}, Journal = {Inf. Syst. Res.}, Volume = {24}, Number = {2}, Pages = {312--333}, Year = {2013}, URL = {https://doi.org/10.1287/isre.1120.0435}, DOI = {10.1287/isre.1120.0435}, timestamp = {Fri, 13 Mar 2020 14:34:54 +0100}, biburl = {https://dblp.org/rec/journals/isr/DanielAS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ rastogi2016geobias, Author = {Ayushi Rastogi}, Editor = {Laura K. Dillon and Willem Visser and Laurie A. Williams}, Title = {Do biases related to geographical location influence work-related decisions in {GitHub}?}, BookTitle = {Proceedings of the 38th International Conference on Software Engineering, {ICSE} 2016, Austin, TX, USA, May 14-22, 2016 - Companion Volume}, Pages = {665--667}, Publisher = {{ACM}}, Year = {2016}, URL = {https://doi.org/10.1145/2889160.2891035}, DOI = {10.1145/2889160.2891035}, timestamp = {Tue, 10 Aug 2021 14:29:45 +0200}, biburl = {https://dblp.org/rec/conf/icse/Rastogi16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ rastogi2018geobias, Author = {Ayushi Rastogi and Nachiappan Nagappan and Georgios Gousios and Andr{\'{e}} van der Hoek}, Editor = {Markku Oivo and Daniel M{\'{e}}ndez Fern{\'{a}}ndez and Audris Mockus}, Title = {Relationship between geographical location and evaluation of developer contributions in {GitHub}}, BookTitle = {Proceedings of the 12th {ACM/IEEE} International Symposium on Empirical Software Engineering and Measurement, {ESEM} 2018, Oulu, Finland, October 11-12, 2018}, Pages = {22:1--22:8}, Publisher = {{ACM}}, Year = {2018}, URL = {https://doi.org/10.1145/3239235.3240504}, DOI = {10.1145/3239235.3240504}, timestamp = {Wed, 21 Nov 2018 12:44:13 +0100}, biburl = {https://dblp.org/rec/conf/esem/RastogiNGH18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{ takhteyev2010ossgeography, Author = {Yuri Takhteyev and Andrew Hilts}, Title = {Investigating the geography of open source software through {GitHub}}, Year = {2010}, HowPublished = {\url{https://flosshub.org/sites/flosshub.org/files/Takhteyev-Hilts-2010.pdf}} } @Article{ prana2021geogenderdiversity, Author = {Prana, Gede Artha Azriadi and Ford, Denae and Rastogi, Ayushi and Lo, David and Purandare, Rahul and Nagappan, Nachiappan}, Journal = {IEEE Transactions on Software Engineering}, Title = {Including Everyone, Everywhere: Understanding Opportunities and Challenges of Geographic Gender-Inclusion in {OSS}}, Year = {2021}, Note = {to appear}, DOI = {10.1109/TSE.2021.3092813} } @InProceedings{ herbsleb2007globalsweng, Author = {James D. Herbsleb}, Editor = {Lionel C. Briand and Alexander L. Wolf}, Title = {Global Software Engineering: The Future of Socio-technical Coordination}, BookTitle = {International Conference on Software Engineering, {ISCE} 2007, Workshop on the Future of Software Engineering, {FOSE} 2007, May 23-25, 2007, Minneapolis, MN, {USA}}, Pages = {188--198}, Publisher = {{IEEE} Computer Society}, Year = {2007}, URL = {https://doi.org/10.1109/FOSE.2007.11}, DOI = {10.1109/FOSE.2007.11}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/icse/Herbsleb07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{ holmstrom2006globaldev, Author = {Helena Holmstr{\"{o}}m and Eoin {\'{O}} Conch{\'{u}}ir and P{\"{a}}r J. {\AA}gerfalk and Brian Fitzgerald}, Title = {Global Software Development Challenges: {A} Case Study on Temporal, Geographical and Socio-Cultural Distance}, BookTitle = {1st {IEEE} International Conference on Global Software Engineering, {ICGSE} 2006, Florianopolis, Brazil, October 2006}, Pages = {3--11}, Publisher = {{IEEE} Computer Society}, Year = {2006}, URL = {https://doi.org/10.1109/ICGSE.2006.261210}, DOI = {10.1109/ICGSE.2006.261210}, timestamp = {Wed, 16 Oct 2019 14:14:50 +0200}, biburl = {https://dblp.org/rec/conf/icgse/HolmstromCAF06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } + +@Article{ ieee-sw-repro-builds, + Author = {Chris Lamb and Stefano Zacchiroli}, + Title = {Reproducible Builds: Increasing the Integrity of Software + Supply Chains}, + Abstract = {Although it is possible to increase confidence in Free and + Open Source Software (FOSS) by reviewing its source code, + trusting code is not the same as trusting its executable + counterparts. These are typically built and distributed by + third-party vendors, with severe security consequences if + their supply chains are compromised. In this paper, we + present reproducible builds, an approach that can determine + whether generated binaries correspond with their original + source code. We first define the problem, and then provide + insight into the challenges of making real-world software + build in a "reproducible" manner-this is, when every build + generates bit-for-bit identical results. Through the + experience of the Reproducible Builds project making the + Debian Linux distribution reproducible, we also describe + the affinity between reproducibility and quality assurance + (QA).}, + Publisher = {IEEE Computer Society}, + Year = {2022}, + ISSN = {0740-7459}, + DOI = {10.1109/MS.2021.3073045}, + Pages = {62-70}, + Volume = {39}, + Number = {2}, + Journal = {IEEE Software} +} + +@InProceedings{ esem-2022-msr-artifacts, + Author = {Abou Khalil, Zeinab and Stefano Zacchiroli}, + Title = {Software Artifact Mining in Software Engineering + Conferences: A Meta-Analysis}, + Abstract = {Background: Software development results in the production + of various types of artifacts: source code, version control + system metadata, bug reports, mailing list conversations, + test data, etc. Empirical software engineering (ESE) has + thrived mining those artifacts to uncover the inner + workings of software development and improve its practices. + But which artifacts are studied in the field is a moving + target, which we study empirically in this paper. Aims: We + quantitatively characterize the most frequently mined and + co-mined software artifacts in ESE research and the + research purposes they support. Method: We conduct a + meta-analysis of artifact mining studies published in 11 + top conferences in ESE, for a total of 9621 papers. We use + natural language processing (NLP) techniques to + characterize the types of software artifacts that are most + often mined and their evolution over a 16-year period + (2004-2020). We analyze the combinations of artifact types + that are most often mined together, as well as the + relationship between study purposes and mined artifacts. + Results: We find that: (1) mining happens in the vast + majority of analyzed papers, (2) source code and test data + are the most mined artifacts, (3) there is an increasing + interest in mining novel artifacts, together with source + code, (4) researchers are most interested in the evaluation + of software systems and use all possible empirical signals + to support that goal.}, + Publisher = {ACM}, + Year = {2022}, + DOI = {10.1145/3544902.3546239}, + BookTitle = {ACM/IEEE International Symposium on Empirical Software + Engineering and Measurement (ESEM 2022)} +} + +@InProceedings{ msr-2022-foss-licenses, + Author = {Stefano Zacchiroli}, + Title = {A Large-scale Dataset of (Open Source) License Text + Variants}, + Abstract = {We introduce a large-scale dataset of the complete texts + of free/open source software (FOSS) license variants. To + assemble it we have collected from the Software Heritage + archive—the largest publicly available archive of + FOSS source code with accompanying development + history—all versions of files whose names are + commonly used to convey licensing terms to software users + and developers. The dataset consists of 6.5 million unique + license files that can be used to conduct empirical studies + on open source licensing, training of automated license + classifiers, natural language processing (NLP) analyses of + legal texts, as well as historical and phylogenetic studies + on FOSS licensing. Additional metadata about shipped + license files are also provided, making the dataset ready + to use in various contexts; they include: file length + measures, detected MIME type, detected SPDX license (using + ScanCode), example origin (e.g., GitHub repository), oldest + public commit in which the license appeared. The dataset is + released as open data as an archive file containing all + deduplicated license files, plus several portable CSV files + for metadata, referencing files via cryptographic + checksums.}, + Publisher = {ACM}, + Year = {2022}, + DOI = {10.1145/3524842.3528491}, + Pages = {757-761}, + BookTitle = {The 2022 Mining Software Repositories Conference (MSR + 2022)} +} + +@InProceedings{ msr-2022-foss-geography, + Author = {Davide Rossi and Stefano Zacchiroli}, + Title = {Geographic Diversity in Public Code Contributions: An + Exploratory Large-Scale Study Over 50 Years}, + Abstract = {We conduct an exploratory, large-scale, longitudinal study + of 50 years of commits to publicly available version + control system repositories, in order to characterize the + geographic diversity of contributors to public code and its + evolution over time. We analyze in total 2.2 billion + commits collected by Software Heritage from 160 million + projects and authored by 43 million authors during the + 1971–2021 time period. We geolocate developers to 12 + world regions derived from the United Nation geoscheme, + using as signals email top-level domains, author names + compared with names distributions around the world, and UTC + offsets mined from commit metadata. We find evidence of the + early dominance of North America in open source software, + later joined by Europe. After that period, the geographic + diversity in public code has been constantly increasing. We + also identify relevant historical shifts related to the + UNIX wars, the increase of coding literacy in Central and + South Asia, and broader phenomena like colonialism and + people movement across countries + (immigration/emigration).}, + Publisher = {ACM}, + Year = {2022}, + DOI = {10.1145/3524842.3528471}, + Pages = {80-85}, + BookTitle = {The 2022 Mining Software Repositories Conference (MSR + 2022)} +} + +@InProceedings{ msr-2022-swe-general-index, + Author = {Abou Khalil, Zeinab and Stefano Zacchiroli}, + Title = {The General Index of Software Engineering Papers}, + Abstract = {We introduce the General Index of Software Engineering + Papers, a dataset of fulltext-indexed papers from the most + prominent scientific venues in the field of Software + Engineering. The dataset includes both complete + bibliographic information and indexed n-grams (sequence of + contiguous words after removal of stopwords and non-words, + for a total of 577 276 382 unique n-grams in this release) + with length 1 to 5 for 44 581 papers retrieved from 34 + venues over the 1971–2020 period. The dataset serves + use cases in the field of meta-research, allowing to + introspect the output of software engineering research even + when access to papers or scholarly search engines is not + possible (e.g., due to contractual reasons). The dataset + also contributes to making such analyses reproducible and + independently verifiable, as opposed to what happens when + they are conducted using 3rd-party and non-open scholarly + indexing services. The dataset is available as a portable + Postgres database dump and released as open data.}, + Publisher = {ACM}, + Year = {2022}, + DOI = {10.1145/3524842.3528494}, + Pages = {98-102}, + BookTitle = {The 2022 Mining Software Repositories Conference (MSR + 2022)} +} + +@InProceedings{ icse-seis-2022-gender, + Author = {Davide Rossi and Stefano Zacchiroli}, + Title = {Worldwide Gender Differences in Public Code Contributions + (and How They Have Been Affected by the COVID-19 + Pandemic)}, + Abstract = {Gender imbalance is a well-known phenomenon observed + throughout sciences which is particularly severe in + software development and Free/Open Source Software + communities. Little is know yet about the geography of this + phenomenon in particular when considering large scales for + both its time and space dimensions. We contribute to fill + this gap with a longitudinal study of the population of + contributors to publicly available software source code. We + analyze the development history of 160 million software + projects for a total of 2.2 billion commits contributed by + 43 million distinct authors over a period of 50 years. We + classify author names by gender using name frequencies and + author geographical locations using heuristics based on + email addresses and time zones. We study the evolution over + time of contributions to public code by gender and by world + region. For the world overall, we confirm previous findings + about the low but steadily increasing ratio of + contributions by female authors. When breaking down by + world regions we find that the long-term growth of female + participation is a world-wide phenomenon. We also observe a + decrease in the ratio of female participation during the + COVID-19 pandemic, suggesting that women’s ability + to contribute to public code has been more hindered than + that of men.}, + Publisher = {ACM}, + Year = {2022}, + DOI = {10.1109/ICSE-SEIS55304.2022.9794118}, + Pages = {172-183}, + BookTitle = {44th International Conference on Software Engineering + (ICSE 2022) - Software Engineering in Society (SEIS) + Track} +} + +@InProceedings{ msr-2020-topology, + Author = {Antoine Pietri and Guillaume Rousseau and Stefano + Zacchiroli}, + Title = {Determining the Intrinsic Structure of Public Software + Development History}, + Abstract = {Background: Collaborative software development has + produced a wealth of version control system (VCS) data that + can now be analyzed in full. Little is known about the + intrinsic structure of the entire corpus of publicly + available VCS as an interconnected graph. Understanding its + structure is needed to determine the best approach to + analyze it in full and to avoid methodological pitfalls + when doing so. Objective: We intend to determine the most + salient network topology properties of public software + development history as captured by VCS. We will explore: + degree distributions, determining whether they are + scale-free or not; distribution of connect component sizes; + distribution of shortest path lengths. Method: We will use + Software Heritage---which is the largest corpus of public + VCS data---compress it using webgraph compression + techniques, and analyze it in-memory using classic graph + algorithms. Analyses will be performed both on the full + graph and on relevant subgraphs. Limitations: The study is + exploratory in nature; as such no hypotheses on the + findings is stated at this time. Chosen graph algorithms + are expected to scale to the corpus size, but it will need + to be confirmed experimentally. External validity will + depend on how representative Software Heritage is of the + software commons.}, + Publisher = {IEEE}, + Year = {2020}, + DOI = {10.1145/3379597.3387506}, + Pages = {602-605}, + BookTitle = {MSR 2020: The 17th International Conference on Mining + Software Repositories} +} + +@InProceedings{ msr-2020-forks, + Author = {Antoine Pietri and Guillaume Rousseau and Stefano + Zacchiroli}, + Title = {Forking Without Clicking: on How to Identify Software + Repository Forks}, + Abstract = {The notion of software "fork" has been shifting over time + from the (negative) phenomenon of community disagreements + that result in the creation of separate development lines + and ultimately software products, to the (positive) + practice of using distributed version control system (VCS) + repositories to collaboratively improve a single product + without stepping on each others toes. In both cases the VCS + repositories participating in a fork share parts of a + common development history. Studies of software forks + generally rely on hosting platform metadata, such as + GitHub, as the source of truth for what constitutes a fork. + These "forge forks" however can only identify as forks + repositories that have been created on the platform, e.g., + by clicking a "fork" button on the platform user interface. + The increased diversity in code hosting platforms (e.g., + GitLab) and the habits of significant development + communities (e.g., the Linux kernel, which is not primarily + hosted on any single platform) call into question the + reliability of trusting code hosting platforms to identify + forks. Doing so might introduce selection and + methodological biases in empirical studies. In this article + we explore various definitions of "software forks", trying + to capture forking workflows that exist in the real world. + We quantify the differences in how many repositories would + be identified as forks on GitHub according to the various + definitions, confirming that a significant number could be + overlooked by only considering forge forks. We study the + structure and size of fork networks, observing how they are + affected by the proposed definitions and discuss the + potential impact on empirical research.}, + Publisher = {IEEE}, + Year = {2020}, + DOI = {10.1145/3379597.3387450}, + Pages = {277-287}, + BookTitle = {MSR 2020: The 17th International Conference on Mining + Software Repositories} +}