@@ -9,15 +9,21 @@ public static partial class TransformSamples
99 {
1010 public static void Example ( )
1111 {
12- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13- // as well as the source of randomness.
12+ // Create a new ML context, for ML.NET operations. It can be used for
13+ // exception tracking and logging, as well as the source of randomness.
1414 var ml = new MLContext ( ) ;
1515
1616 // Get a small dataset as an IEnumerable and convert to IDataView.
1717 var data = new List < SampleSentimentData > ( ) {
18- new SampleSentimentData { Sentiment = true , SentimentText = "Best game I've ever played." } ,
19- new SampleSentimentData { Sentiment = false , SentimentText = "==RUDE== Dude, 2" } ,
20- new SampleSentimentData { Sentiment = true , SentimentText = "Until the next game, this is the best Xbox game!" } } ;
18+ new SampleSentimentData { Sentiment = true ,
19+ SentimentText = "Best game I've ever played." } ,
20+
21+ new SampleSentimentData { Sentiment = false ,
22+ SentimentText = "==RUDE== Dude, 2" } ,
23+
24+ new SampleSentimentData { Sentiment = true ,
25+ SentimentText = "Until the next game," +
26+ "this is the best Xbox game!" } } ;
2127
2228 // Convert IEnumerable to IDataView.
2329 var trainData = ml . Data . LoadFromEnumerable ( data ) ;
@@ -29,23 +35,42 @@ public static void Example()
2935 // false ==RUDE== Dude, 2.
3036 // true Until the next game, this is the best Xbox game!
3137
32- // A pipeline to tokenize text as characters and then combine them together into n-grams
33- // The pipeline uses the default settings to featurize.
38+ // A pipeline to tokenize text as characters and then combine them
39+ // together into n-grams. The pipeline uses the default settings to
40+ // featurize.
41+
42+ var charsPipeline = ml . Transforms . Text
43+ . TokenizeIntoCharactersAsKeys ( "Chars" , "SentimentText" ,
44+ useMarkerCharacters : false ) ;
45+
46+ var ngramOnePipeline = ml . Transforms . Text
47+ . ProduceNgrams ( "CharsUnigrams" , "Chars" , ngramLength : 1 ) ;
3448
35- var charsPipeline = ml . Transforms . Text . TokenizeIntoCharactersAsKeys ( "Chars" , "SentimentText" , useMarkerCharacters : false ) ;
36- var ngramOnePipeline = ml . Transforms . Text . ProduceNgrams ( "CharsUnigrams" , "Chars" , ngramLength : 1 ) ;
37- var ngramTwpPipeline = ml . Transforms . Text . ProduceNgrams ( "CharsTwograms" , "Chars" ) ;
38- var oneCharsPipeline = charsPipeline . Append ( ngramOnePipeline ) ;
39- var twoCharsPipeline = charsPipeline . Append ( ngramTwpPipeline ) ;
49+ var ngramTwpPipeline = ml . Transforms . Text
50+ . ProduceNgrams ( "CharsTwograms" , "Chars" ) ;
51+
52+ var oneCharsPipeline = charsPipeline
53+ . Append ( ngramOnePipeline ) ;
54+
55+ var twoCharsPipeline = charsPipeline
56+ . Append ( ngramTwpPipeline ) ;
4057
4158 // The transformed data for pipelines.
42- var transformedData_onechars = oneCharsPipeline . Fit ( trainData ) . Transform ( trainData ) ;
43- var transformedData_twochars = twoCharsPipeline . Fit ( trainData ) . Transform ( trainData ) ;
59+ var transformedData_onechars = oneCharsPipeline . Fit ( trainData )
60+ . Transform ( trainData ) ;
61+
62+ var transformedData_twochars = twoCharsPipeline . Fit ( trainData )
63+ . Transform ( trainData ) ;
4464
4565 // Small helper to print the text inside the columns, in the console.
46- Action < string , IEnumerable < VBuffer < float > > , VBuffer < ReadOnlyMemory < char > > > printHelper = ( columnName , column , names ) =>
66+ Action < string , IEnumerable < VBuffer < float > > ,
67+ VBuffer < ReadOnlyMemory < char > > >
68+ printHelper = ( columnName , column , names ) =>
69+
4770 {
48- Console . WriteLine ( $ "{ columnName } column obtained post-transformation.") ;
71+ Console . WriteLine (
72+ $ "{ columnName } column obtained post-transformation.") ;
73+
4974 var slots = names . GetValues ( ) ;
5075 foreach ( var featureRow in column )
5176 {
@@ -54,21 +79,33 @@ public static void Example()
5479 Console . WriteLine ( "" ) ;
5580 }
5681
57- Console . WriteLine ( "===================================================" ) ;
82+ Console . WriteLine (
83+ "===================================================" ) ;
5884 } ;
59- // Preview of the CharsUnigrams column obtained after processing the input.
85+ // Preview of the CharsUnigrams column obtained after processing the
86+ // input.
6087 VBuffer < ReadOnlyMemory < char > > slotNames = default ;
61- transformedData_onechars . Schema [ "CharsUnigrams" ] . GetSlotNames ( ref slotNames ) ;
62- var charsOneGramColumn = transformedData_onechars . GetColumn < VBuffer < float > > ( transformedData_onechars . Schema [ "CharsUnigrams" ] ) ;
88+ transformedData_onechars . Schema [ "CharsUnigrams" ]
89+ . GetSlotNames ( ref slotNames ) ;
90+
91+ var charsOneGramColumn = transformedData_onechars
92+ . GetColumn < VBuffer < float > > ( transformedData_onechars
93+ . Schema [ "CharsUnigrams" ] ) ;
94+
6395 printHelper ( "CharsUnigrams" , charsOneGramColumn , slotNames ) ;
6496
6597 // CharsUnigrams column obtained post-transformation.
6698 // 'B' - 1 'e' - 6 's' - 1 't' - 1 '<?>' - 4 'g' - 1 'a' - 2 'm' - 1 'I' - 1 ''' - 1 'v' - 2 ...
6799 // 'e' - 1 '<?>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1
68100 // 'B' - 0 'e' - 6 's' - 3 't' - 6 '<?>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ...
69101 // Preview of the CharsTwoGrams column obtained after processing the input.
70- var charsTwoGramColumn = transformedData_twochars . GetColumn < VBuffer < float > > ( transformedData_twochars . Schema [ "CharsTwograms" ] ) ;
71- transformedData_twochars . Schema [ "CharsTwograms" ] . GetSlotNames ( ref slotNames ) ;
102+ var charsTwoGramColumn = transformedData_twochars
103+ . GetColumn < VBuffer < float > > ( transformedData_twochars
104+ . Schema [ "CharsTwograms" ] ) ;
105+
106+ transformedData_twochars . Schema [ "CharsTwograms" ]
107+ . GetSlotNames ( ref slotNames ) ;
108+
72109 printHelper ( "CharsTwograms" , charsTwoGramColumn , slotNames ) ;
73110
74111 // CharsTwograms column obtained post-transformation.
@@ -78,7 +115,8 @@ public static void Example()
78115 }
79116
80117 /// <summary>
81- /// A dataset that contains a tweet and the sentiment assigned to that tweet: 0 - negative and 1 - positive sentiment.
118+ /// A dataset that contains a tweet and the sentiment assigned to that
119+ /// tweet: 0 - negative and 1 - positive sentiment.
82120 /// </summary>
83121 public class SampleSentimentData
84122 {
0 commit comments