Page MenuHomeSoftware Heritage

bib-rapport.bib
No OneTemporary

bib-rapport.bib

@misc{Aylien16,
Author = {Aylien},
Date-Added = {2018-02-17 20:56:11 +0000},
Date-Modified = {2018-02-17 21:00:33 +0000},
Howpublished = {\url{http://blog.aylien.com/source-code-classification-using-deep-learning/}},
Keywords = {data science, research},
Month = {August},
Title = {Source Code Classification Using Deep Learning [blog post]},
Year = {2016}}
@misc{universal-ctags,
Author = {Universal Ctags Team},
Date-Added = {2018-02-17 20:53:07 +0000},
Date-Modified = {2018-02-17 20:54:44 +0000},
Howpublished = {\url{http://ctags.io/}},
Title = {Universal Ctags},
Year = {2001--2018}}
@misc{sloccount,
Author = {David A. Wheeler},
Date-Added = {2018-02-17 20:47:15 +0000},
Date-Modified = {2018-02-17 20:51:51 +0000},
Howpublished = {\url{https://www.dwheeler.com/sloccount/}},
Title = {SLOCCount},
Year = {2004--2018}}
@misc{cloc,
Author = {Al Danial},
Date-Added = {2018-02-17 20:46:02 +0000},
Date-Modified = {2018-02-17 20:46:38 +0000},
Howpublished = {\url{https://github.com/AlDanial/cloc}},
Title = {cloc},
Year = {2006--2018}}
@misc{guesslang,
Author = {Y. Somda},
Date-Added = {2018-02-17 20:27:54 +0000},
Date-Modified = {2018-02-17 20:43:42 +0000},
Howpublished = {\url{http://guesslang.readthedocs.io/}},
Title = {Guesslang},
Year = {2017--2018}}
@misc{linguist,
Author = {Github},
Date-Added = {2018-02-17 20:21:27 +0000},
Date-Modified = {2018-02-17 20:26:46 +0000},
Howpublished = {\url{https://github.com/github/linguist}},
Title = {Linguist},
Year = {2011--2018}}
@misc{ohcount,
Author = {Black Duck Software},
Date-Added = {2018-02-17 20:11:31 +0000},
Date-Modified = {2018-02-17 21:03:52 +0000},
Title = {Ohcount},
Howpublished = {\url{https://github.com/blackducksoftware/ohcount}},
Year = {2008--2018}}
@inproceedings{vanDam16,
Author = {J. K. v. Dam and V. Zaytsev},
Booktitle = {2016 IEEE 23rd International Conference on Software Analysis, Evolution, and Reengineering (SANER)},
Doi = {10.1109/SANER.2016.92},
Keywords = {meta data;natural language processing;pattern classification;program diagnostics;software maintenance;text analysis;embedded code fragments;file extensions;grammar-based text analysis;keyword search;legacy code analysis;multinominal naïve Bayes;n-grams;natural language classifiers;natural language processing field;normalised compression distance;skip-grams;software artefact metadata;software language identification;statistical language models;universal IDE support;Cascading style sheets;HTML;Java;Natural languages;Software;Training;Training data;language identification;natural language processing;software language engineering},
Month = {March},
Pages = {624-628},
Title = {Software Language Identification with Natural Language Classifiers},
Volume = {1},
Year = {2016},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/SANER.2016.92}}
@article{Klein11,
Archiveprefix = {arXiv},
Author = {David Klein and Kyle Murray and Simon Weber},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.org/rec/bib/journals/corr/abs-1106-4064},
Eprint = {1106.4064},
Journal = {CoRR},
Timestamp = {Wed, 07 Jun 2017 14:41:07 +0200},
Title = {Algorithmic Programming Language Identification},
Url = {http://arxiv.org/abs/1106.4064},
Volume = {abs/1106.4064},
Year = {2011},
Bdsk-Url-1 = {http://arxiv.org/abs/1106.4064}}
@inproceedings{Gilda17,
Author = {S. Gilda},
Booktitle = {2017 14th International Joint Conference on Computer Science and Software Engineering (JCSSE)},
Doi = {10.1109/JCSSE.2017.8025917},
Keywords = {feature extraction;learning (artificial intelligence);neural nets;pattern classification;programming languages;software engineering;source code (software);artificial neural network;convolutional neural network;file extension;intelligent feature extraction;multilayer neural network;neural networks;programming languages;software development industry;source code classification;supervised learning;word embedding layers;Feature extraction;HTML;Syntactics;Training;Artificial neural network;Feature extraction;Multi-layer neural network;Supervised learning},
Month = {July},
Pages = {1-6},
Title = {Source code classification using Neural Networks},
Year = {2017},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/JCSSE.2017.8025917}}
@article{Zevin17,
Archiveprefix = {arXiv},
Author = {Shaul Zevin and Catherine Holzem},
Bibsource = {dblp computer science bibliography, http://dblp.org},
Biburl = {http://dblp.org/rec/bib/journals/corr/ZevinH17},
Eprint = {1703.07638},
Journal = {CoRR},
Timestamp = {Wed, 07 Jun 2017 14:41:28 +0200},
Title = {Machine Learning Based Source Code Classification Using Syntax Oriented Features},
Url = {http://arxiv.org/abs/1703.07638},
Volume = {abs/1703.07638},
Year = {2017},
Bdsk-Url-1 = {http://arxiv.org/abs/1703.07638}}
@inproceedings{Ugurel02,
Acmid = {775141},
Address = {New York, NY, USA},
Author = {Ugurel, Secil and Krovetz, Robert and Giles, C. Lee},
Booktitle = {Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
Doi = {10.1145/775047.775141},
Isbn = {1-58113-567-X},
Location = {Edmonton, Alberta, Canada},
Numpages = {7},
Pages = {632--638},
Publisher = {ACM},
Series = {KDD '02},
Title = {What's the Code?: Automatic Classification of Source Code Archives},
Url = {http://doi.acm.org/10.1145/775047.775141},
Year = {2002},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/775047.775141},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/775047.775141}}
@inproceedings{Wang15,
author = {Peng Wang and
Jiaming Xu and
Bo Xu and
Cheng{-}Lin Liu and
Heng Zhang and
Fangyuan Wang and
Hongwei Hao},
title = {Semantic Clustering and Convolutional Neural Network for Short Text
Categorization},
booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational
Linguistics and the 7th International Joint Conference on Natural
Language Processing of the Asian Federation of Natural Language Processing,
{ACL} 2015, July 26-31, 2015, Beijing, China, Volume 2: Short Papers},
pages = {352--357},
year = {2015},
url = {http://aclweb.org/anthology/P/P15/P15-2058.pdf},
timestamp = {Mon, 03 Aug 2015 08:13:34 +0200},
biburl = {http://dblp.org/rec/bib/conf/acl/WangXXLZWH15},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@inproceedings{Khasnabish14,
author = {Jyotiska Nath Khasnabish and
Mitali Sodhi and
Jayati Deshmukh and
G. Srinivasaraghavan},
title = {Detecting Programming Language from Source Code Using Bayesian Learning
Techniques},
booktitle = {Machine Learning and Data Mining in Pattern Recognition - 10th International
Conference, {MLDM} 2014, St. Petersburg, Russia, July 21-24, 2014.
Proceedings},
pages = {513--522},
year = {2014},
url = {https://doi.org/10.1007/978-3-319-08979-9_39},
doi = {10.1007/978-3-319-08979-9_39},
timestamp = {Wed, 17 May 2017 14:25:11 +0200},
biburl = {http://dblp.org/rec/bib/conf/mldm/KhasnabishSDS14},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@misc{Heres16,
Author = {Daniël Heres},
Howpublished = {\url{http://blog.aylien.com/source-code-classification-using-deep-learning/}},
Month = {July},
Title = {Detecting the Programming Language of Source Code Snippets using Machine Learning and Neural Networks [blog post]},
Year = {2016}}
@Inbook{Aggarwal12,
author={Aggarwal, Charu C.
and Zhai, ChengXiang},
editor={Aggarwal, Charu C.
and Zhai, ChengXiang},
title={A Survey of Text Classification Algorithms},
bookTitle={Mining Text Data},
year={2012},
publisher={Springer US},
address={Boston, MA},
pages={163--222},
abstract={The problem of classification has been widely studied in the data mining, machine learning, database, and information retrieval communities with applications in a number of diverse domains, such as target marketing, medical diagnosis, news group filtering, and document organization. In this paper we will provide a survey of a wide variety of text classification algorithms.},
isbn={978-1-4614-3223-4},
doi={10.1007/978-1-4614-3223-4_6},
url={https://doi.org/10.1007/978-1-4614-3223-4_6}
}
@article{Chen09,
title = {Feature selection for text classification with Naïve Bayes},
journal = {Expert Systems with Applications},
volume = {36},
number = {3, Part 1},
pages = {5432 - 5435},
year = {2009},
issn = {0957-4174},
doi = {https://doi.org/10.1016/j.eswa.2008.06.054},
url = {http://www.sciencedirect.com/science/article/pii/S0957417408003564},
author = {Jingnian Chen and Houkuan Huang and Shengfeng Tian and Youli Qu},
keywords = {Text classification, Feature selection, Text preprocessing, Naïve Bayes}
}
@misc{MLatB16,
Author = {Machine Learning at Berkeley},
Howpublished = {\url{https://ml.berkeley.edu/blog/2016/12/03/github/}},
Keywords = {data science, research},
Month = {December},
Title = {Github Programming Language Classification [blog post]},
Year = {2016}
}
@article{Cavnar94,
title={N-gram-based text categorization},
author={Cavnar, William B and Trenkle, John M and others},
journal={Ann arbor mi},
volume={48113},
number={2},
pages={161--175},
year={1994},
publisher={Citeseer}
}
@article{Kim15,
author = {Yoon Kim and
Yacine Jernite and
David Sontag and
Alexander M. Rush},
title = {Character-Aware Neural Language Models},
journal = {CoRR},
volume = {abs/1508.06615},
year = {2015},
url = {http://arxiv.org/abs/1508.06615},
archivePrefix = {arXiv},
eprint = {1508.06615},
timestamp = {Wed, 07 Jun 2017 14:41:17 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/KimJSR15},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Kim14,
author = {Yoon Kim},
title = {Convolutional Neural Networks for Sentence Classification},
journal = {CoRR},
volume = {abs/1408.5882},
year = {2014},
url = {http://arxiv.org/abs/1408.5882},
archivePrefix = {arXiv},
eprint = {1408.5882},
timestamp = {Wed, 07 Jun 2017 14:40:07 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/Kim14f},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{kenlm,
author = {Kenneth Heafield},
title = {{KenLM:} Faster and Smaller Language Model Queries},
year = {2011},
month = {July},
booktitle = {Proceedings of the {EMNLP} 2011 Sixth Workshop on Statistical Machine Translation},
address = {Edinburgh, Scotland, United Kingdom},
pages = {187--197},
url = {https://kheafield.com/papers/avenue/kenlm.pdf},
}
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
}
@misc{keras,
title={Keras},
author={Chollet, Fran\c{c}ois and others},
year={2015},
howpublished={\url{https://keras.io}},
}
@misc{tensorflow2015-whitepaper,
title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
howpublished={\url{https://www.tensorflow.org/}},
author={
Mart\'{\i}n~Abadi and
Ashish~Agarwal and
Paul~Barham and
Eugene~Brevdo and
Zhifeng~Chen and
Craig~Citro and
Greg~S.~Corrado and
Andy~Davis and
Jeffrey~Dean and
Matthieu~Devin and
Sanjay~Ghemawat and
Ian~Goodfellow and
Andrew~Harp and
Geoffrey~Irving and
Michael~Isard and
Yangqing Jia and
Rafal~Jozefowicz and
Lukasz~Kaiser and
Manjunath~Kudlur and
Josh~Levenberg and
Dandelion~Man\'{e} and
Rajat~Monga and
Sherry~Moore and
Derek~Murray and
Chris~Olah and
Mike~Schuster and
Jonathon~Shlens and
Benoit~Steiner and
Ilya~Sutskever and
Kunal~Talwar and
Paul~Tucker and
Vincent~Vanhoucke and
Vijay~Vasudevan and
Fernanda~Vi\'{e}gas and
Oriol~Vinyals and
Pete~Warden and
Martin~Wattenberg and
Martin~Wicke and
Yuan~Yu and
Xiaoqiang~Zheng},
year={2015},
}
@article{Gepperth16,
Abstract = {We present a biologically inspired architecture for incremental learning that remains resource-efficient even in the face of very high data dimensionalities (>1000) that are typically associated with perceptual problems. In particular, we investigate how a new perceptual (object) class can be added to a trained architecture without retraining, while avoiding the well-known catastrophic forgetting effects typically associated with such scenarios. At the heart of the presented architecture lies a generative description of the perceptual space by a self-organized approach which at the same time approximates the neighborhood relations in this space on a two-dimensional plane. This approximation, which closely imitates the topographic organization of the visual cortex, allows an efficient local update rule for incremental learning even in the face of very high dimensionalities, which we demonstrate by tests on the well-known MNIST benchmark. We complement the model by adding a biologically plausible short-term memory system, allowing it to retain excellent classification accuracy even under incremental learning in progress. The short-term memory is additionally used to reinforce new data statistics by replaying previously stored samples during dedicated ``sleep'' phases.},
Author = {Gepperth, Alexander and Karaoguz, Cem},
Day = {01},
Doi = {10.1007/s12559-016-9389-5},
Issn = {1866-9964},
Journal = {Cognitive Computation},
Month = {Oct},
Number = {5},
Pages = {924--934},
Title = {A Bio-Inspired Incremental Learning Architecture for Applied Perceptual Problems},
Url = {https://doi.org/10.1007/s12559-016-9389-5},
Volume = {8},
Year = {2016},
Bdsk-Url-1 = {https://doi.org/10.1007/s12559-016-9389-5}}
@article{RebuffiKL16,
author = {Sylvestre{-}Alvise Rebuffi and
Alexander Kolesnikov and
Christoph H. Lampert},
title = {iCaRL: Incremental Classifier and Representation Learning},
journal = {CoRR},
volume = {abs/1611.07725},
year = {2016},
url = {http://arxiv.org/abs/1611.07725},
archivePrefix = {arXiv},
eprint = {1611.07725},
timestamp = {Wed, 07 Jun 2017 14:42:11 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/RebuffiKL16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Kemker17,
author = {Ronald Kemker and
Christopher Kanan},
title = {FearNet: Brain-Inspired Model for Incremental Learning},
journal = {CoRR},
volume = {abs/1711.10563},
year = {2017},
url = {http://arxiv.org/abs/1711.10563},
archivePrefix = {arXiv},
eprint = {1711.10563},
timestamp = {Mon, 04 Dec 2017 18:34:59 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1711-10563},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DiCosmo17,
author = {Di Cosmo, Roberto and Stefano Zacchiroli},
title = {Software Heritage: Why and How to Preserve Software Source Code},
abstract = {Software is now a key component present in all aspects of our society. Its preservation has attracted growing attention over the past years within the digital preservation community. We claim that source code ``the only representation of software that contains human readable knowledge'' is a precious digital object that needs special handling: it must be a first class citizen in the preservation landscape and we need to take action immediately, given the increasingly more frequent incidents that result in permanent losses of source code collections. In this paper we present Software Heritage, an ambitious initiative to collect, preserve, and share the entire corpus of publicly accessible software source code. We discuss the archival goals of the project, its use cases and role as a participant in the broader digital preservation ecosystem, and detail its key design decisions. We also report on the project road map and the current status of the Software Heritage archive that, as of early 2017, has collected more than 3 billion unique source code files and 700 million commits coming from more than 50 million software development projects.},
year = {2017},
booktitle = {iPRES 2017: 14th International Conference on Digital Preservation},
}

File Metadata

Mime Type
text/plain
Expires
Wed, Jun 4, 7:19 PM (2 d, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3399059

Event Timeline